Coverage for databooks/data_models/rich_helpers.py: 85%

48 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-10-03 12:27 +0000

1"""Rich helpers functions for rich renderables in data models.""" 

2from html.parser import HTMLParser 

3from typing import Any, List, Optional, Tuple 

4 

5from rich import box 

6from rich.table import Table 

7 

8HtmlAttr = Tuple[str, Optional[str]] 

9 

10 

11class RichHtmlTableError(Exception): 

12 """Could not parse HTML table.""" 

13 

14 def __init__(self, msg: str = "", *args: Any): 

15 """Use class docstring as error 'prefix'.""" 

16 if self.__doc__ is None: 

17 raise ValueError("Exception docstring required - used in error message.") 

18 super().__init__(" ".join((self.__doc__, msg)), *args) 

19 

20 

21class HtmlTable(HTMLParser): 

22 """Rich table from HTML string.""" 

23 

24 def __init__(self, html: str, *args: Any, **kwargs: Any) -> None: 

25 """Initialize parser.""" 

26 super().__init__(*args, **kwargs) 

27 self.table = self.thead = self.tbody = self.body = self.th = self.td = False 

28 self.headers: List[str] = [] 

29 self.row: List[str] = [] 

30 self.rows: List[List[str]] = [] 

31 self.feed(html) 

32 

33 def handle_starttag(self, tag: str, attrs: List[HtmlAttr]) -> None: 

34 """Active tags are indicated via instance boolean properties.""" 

35 if getattr(self, tag, None): 

36 raise RichHtmlTableError(f"Already in `{tag}`.") 

37 setattr(self, tag, True) 

38 

39 def handle_endtag(self, tag: str) -> None: 

40 """Write table properties when closing tags.""" 

41 if not getattr(self, tag): 

42 raise RichHtmlTableError(f"Cannot end unopened `{tag}`.") 

43 

44 # If we are ending a row, either set a table header or row 

45 if tag == "tr": 

46 if self.thead: 

47 self.headers = self.row 

48 if self.tbody: 

49 self.rows.append(self.row) 

50 self.row = [] # restart row values 

51 setattr(self, tag, False) 

52 

53 def handle_data(self, data: str) -> None: 

54 """Append data depending on active tags.""" 

55 if self.table and (self.th or self.td): 

56 self.row.append(data) 

57 

58 def rich(self, **tbl_kwargs: Any) -> Optional[Table]: 

59 """Generate `rich` representation of table.""" 

60 if not self.rows and not self.headers: # HTML is not a table 

61 return None 

62 

63 _ncols = len(self.rows[0]) 

64 _headers = [""] * (_ncols - len(self.headers)) + self.headers 

65 if any(len(row) != _ncols for row in self.rows): 

66 raise RichHtmlTableError(f"Expected all rows to have {_ncols} columns.") 

67 

68 _box = tbl_kwargs.pop("box", box.SIMPLE_HEAVY) 

69 _row_styles = tbl_kwargs.pop("row_styles", ["on bright_black", ""]) 

70 

71 table = Table(*_headers, box=_box, row_styles=_row_styles, **tbl_kwargs) 

72 for row in self.rows: 

73 table.add_row(*row) 

74 return table