Coverage for databooks/data_models/rich_helpers.py: 91%

43 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-11 20:30 +0000

1"""Rich helpers functions for rich renderables in data models.""" 

2from html.parser import HTMLParser 

3from typing import Any, List, Optional, Tuple 

4 

5from rich import box 

6from rich.table import Table 

7 

8HtmlAttr = Tuple[str, Optional[str]] 

9 

10 

11class HtmlTable(HTMLParser): 

12 """Rich table from HTML string.""" 

13 

14 def __init__(self, html: str, *args: Any, **kwargs: Any) -> None: 

15 """Initialize parser.""" 

16 super().__init__(*args, **kwargs) 

17 self.table = self.thead = self.tbody = self.body = self.th = self.td = False 

18 self.headers: List[str] = [] 

19 self.row: List[str] = [] 

20 self.rows: List[List[str]] = [] 

21 self.feed(html) 

22 

23 def handle_starttag(self, tag: str, attrs: List[HtmlAttr]) -> None: 

24 """Active tags are indicated via instance boolean properties.""" 

25 if getattr(self, tag, None): 

26 raise ValueError(f"Already in `{tag}`.") 

27 setattr(self, tag, True) 

28 

29 def handle_endtag(self, tag: str) -> None: 

30 """Write table properties when closing tags.""" 

31 if not getattr(self, tag): 

32 raise ValueError(f"Cannot end unopened `{tag}`.") 

33 

34 # If we are ending a row, either set a table header or row 

35 if tag == "tr": 

36 if self.thead: 

37 self.headers = self.row 

38 if self.tbody: 

39 self.rows.append(self.row) 

40 self.row = [] # restart row values 

41 setattr(self, tag, False) 

42 

43 def handle_data(self, data: str) -> None: 

44 """Append data depending on active tags.""" 

45 if self.table and (self.th or self.td): 

46 self.row.append(data) 

47 

48 def rich(self, **tbl_kwargs: Any) -> Optional[Table]: 

49 """Generate `rich` representation of table.""" 

50 if not self.rows and not self.headers: # HTML is not a table 

51 return None 

52 

53 _ncols = len(self.rows[0]) 

54 _headers = [""] * (_ncols - len(self.headers)) + self.headers 

55 if any(len(row) != _ncols for row in self.rows): 

56 raise ValueError(f"Expected all rows to have {_ncols} columns.") 

57 

58 _box = tbl_kwargs.pop("box", box.SIMPLE_HEAVY) 

59 _row_styles = tbl_kwargs.pop("row_styles", ["on bright_black", ""]) 

60 

61 table = Table(*_headers, box=_box, row_styles=_row_styles, **tbl_kwargs) 

62 for row in self.rows: 

63 table.add_row(*row) 

64 return table