Coverage for databooks/metadata.py: 94%
31 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
1"""Metadata wrapper functions for cleaning notebook metadata."""
2from pathlib import Path
3from typing import Any, Callable, List, Optional, Sequence
5from databooks import JupyterNotebook
6from databooks.data_models.cell import BaseCell
7from databooks.logging import get_logger, set_verbose
9logger = get_logger(__file__)
12def clear(
13 read_path: Path,
14 write_path: Optional[Path] = None,
15 notebook_metadata_keep: Sequence[str] = (),
16 cell_metadata_keep: Sequence[str] = (),
17 cell_fields_keep: Sequence[str] = (),
18 check: bool = False,
19 verbose: bool = False,
20 overwrite: bool = False,
21 **kwargs: Any,
22) -> bool:
23 """
24 Clear Jupyter Notebook metadata.
26 Clear metadata (at notebook and cell level) and write clean
27 notebook. By default, remove all metadata.
28 :param read_path: Path of notebook file with metadata to be cleaned
29 :param write_path: Path of notebook file with metadata to be cleaned
30 :param notebook_metadata_keep: Notebook metadata fields to keep
31 :param cell_metadata_keep: Cell metadata fields to keep
32 :param cell_fields_keep: Cell fields to keep
33 :param check: Don't write any files, check whether there is unwanted metadata
34 :param verbose: Log written files
35 :param overwrite: Whether to overwrite files (if exists)
36 :param kwargs: Additional keyword arguments to pass to
37 `databooks.data_models.JupyterNotebook.clear_metadata`
38 :return: Whether notebooks are equal
39 """
40 if verbose:
41 set_verbose(logger)
43 if write_path is None:
44 write_path = read_path
45 notebook = JupyterNotebook.parse_file(read_path)
47 # Get fields to remove from cells and keep notebook schema
48 cell_fields = {field for cell in notebook.cells for field, _ in cell if field}
49 cell_fields_keep = list(cell_fields_keep) + list(BaseCell.__fields__)
51 cell_remove_fields = [
52 field for field in cell_fields if field not in cell_fields_keep
53 ]
55 notebook.clear_metadata(
56 notebook_metadata_keep=notebook_metadata_keep,
57 cell_metadata_keep=cell_metadata_keep,
58 cell_remove_fields=cell_remove_fields,
59 **kwargs,
60 )
61 nb_equals = notebook == JupyterNotebook.parse_file(read_path)
63 if nb_equals or check:
64 msg = (
65 "no metadata to remove."
66 if nb_equals
67 else "only check (unwanted metadata found)."
68 )
69 logger.debug(f"No action taken for {read_path} - {msg}")
70 else:
71 notebook.write(path=write_path, overwrite=overwrite)
72 logger.debug(f"Removed metadata from {read_path}, saved as {write_path}")
74 return nb_equals
77def clear_all(
78 read_paths: List[Path],
79 write_paths: List[Path],
80 *,
81 progress_callback: Callable[[], None] = lambda: None,
82 **clear_kwargs: Any,
83) -> List[bool]:
84 """
85 Clear metadata for multiple notebooks at notebooks and cell level.
87 :param read_paths: Paths of notebook to remove metadata
88 :param write_paths: Paths of where to write cleaned notebooks
89 :param progress_callback: Callback function to report progress
90 :param clear_kwargs: Keyword arguments to be passed to `databooks.metadata.clear`
91 :return: Whether the notebooks contained or not unwanted metadata
92 """
93 if len(read_paths) != len(write_paths):
94 raise ValueError(
95 "Read and write paths must have same length."
96 f" Got {len(read_paths)} and {len(write_paths)}"
97 )
98 checks = []
99 for nb_path, write_path in zip(read_paths, write_paths):
100 checks.append(clear(read_path=nb_path, write_path=write_path, **clear_kwargs))
101 progress_callback()
102 return checks