Coverage for databooks/data_models/rich_helpers.py: 85%
48 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
1"""Rich helpers functions for rich renderables in data models."""
2from html.parser import HTMLParser
3from typing import Any, List, Optional, Tuple
5from rich import box
6from rich.table import Table
8HtmlAttr = Tuple[str, Optional[str]]
11class RichHtmlTableError(Exception):
12 """Could not parse HTML table."""
14 def __init__(self, msg: str = "", *args: Any):
15 """Use class docstring as error 'prefix'."""
16 if self.__doc__ is None:
17 raise ValueError("Exception docstring required - used in error message.")
18 super().__init__(" ".join((self.__doc__, msg)), *args)
21class HtmlTable(HTMLParser):
22 """Rich table from HTML string."""
24 def __init__(self, html: str, *args: Any, **kwargs: Any) -> None:
25 """Initialize parser."""
26 super().__init__(*args, **kwargs)
27 self.table = self.thead = self.tbody = self.body = self.th = self.td = False
28 self.headers: List[str] = []
29 self.row: List[str] = []
30 self.rows: List[List[str]] = []
31 self.feed(html)
33 def handle_starttag(self, tag: str, attrs: List[HtmlAttr]) -> None:
34 """Active tags are indicated via instance boolean properties."""
35 if getattr(self, tag, None):
36 raise RichHtmlTableError(f"Already in `{tag}`.")
37 setattr(self, tag, True)
39 def handle_endtag(self, tag: str) -> None:
40 """Write table properties when closing tags."""
41 if not getattr(self, tag):
42 raise RichHtmlTableError(f"Cannot end unopened `{tag}`.")
44 # If we are ending a row, either set a table header or row
45 if tag == "tr":
46 if self.thead:
47 self.headers = self.row
48 if self.tbody:
49 self.rows.append(self.row)
50 self.row = [] # restart row values
51 setattr(self, tag, False)
53 def handle_data(self, data: str) -> None:
54 """Append data depending on active tags."""
55 if self.table and (self.th or self.td):
56 self.row.append(data)
58 def rich(self, **tbl_kwargs: Any) -> Optional[Table]:
59 """Generate `rich` representation of table."""
60 if not self.rows and not self.headers: # HTML is not a table
61 return None
63 _ncols = len(self.rows[0])
64 _headers = [""] * (_ncols - len(self.headers)) + self.headers
65 if any(len(row) != _ncols for row in self.rows):
66 raise RichHtmlTableError(f"Expected all rows to have {_ncols} columns.")
68 _box = tbl_kwargs.pop("box", box.SIMPLE_HEAVY)
69 _row_styles = tbl_kwargs.pop("row_styles", ["on bright_black", ""])
71 table = Table(*_headers, box=_box, row_styles=_row_styles, **tbl_kwargs)
72 for row in self.rows:
73 table.add_row(*row)
74 return table