Coverage for databooks/metadata.py: 94%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Metadata wrapper functions for cleaning notebook metadata."""
2from pathlib import Path
3from typing import Any, Callable, List, Optional, Sequence
5from databooks import JupyterNotebook
6from databooks.common import write_notebook
7from databooks.data_models.notebook import Cell
8from databooks.logging import get_logger, set_verbose
10logger = get_logger(__file__)
13def clear(
14 read_path: Path,
15 write_path: Optional[Path] = None,
16 notebook_metadata_keep: Sequence[str] = (),
17 cell_metadata_keep: Sequence[str] = (),
18 cell_fields_keep: Sequence[str] = (),
19 check: bool = False,
20 verbose: bool = False,
21 **kwargs: Any,
22) -> bool:
23 """
24 Clear Jupyter Notebook metadata.
26 Clear metadata (at notebook and cell level) and write clean
27 notebook. By default remove all metadata.
28 :param read_path: Path of notebook file with metadata to be cleaned
29 :param write_path: Path of notebook file with metadata to be cleaned
30 :param notebook_metadata_keep: Notebook metadata fields to keep
31 :param cell_metadata_keep: Cell metadata fields to keep
32 :param cell_fields_keep: Cell fields to keep
33 :param check: Don't write any files, check whether there is unwanted metadata
34 :param verbose: Log written files
35 :param kwargs: Additional keyword arguments to pass to
36 `databooks.data_models.JupyterNotebook.clear_metadata`
37 :return: Whether notebooks are equal
38 """
39 if verbose:
40 set_verbose(logger)
42 if write_path is None:
43 write_path = read_path
44 notebook = JupyterNotebook.parse_file(read_path)
46 # Get fields to remove from cells and keep notebook schema
47 cell_fields = {field for cell in notebook.cells for field, _ in cell if field}
48 cell_fields_keep = list(cell_fields_keep) + list(Cell.__fields__)
50 cell_remove_fields = [
51 field for field in cell_fields if field not in cell_fields_keep
52 ]
54 notebook.clear_metadata(
55 notebook_metadata_keep=notebook_metadata_keep,
56 cell_metadata_keep=cell_metadata_keep,
57 cell_remove_fields=cell_remove_fields,
58 **kwargs,
59 )
60 nb_equals = notebook == JupyterNotebook.parse_file(read_path)
62 if nb_equals or check:
63 msg = (
64 "only check (unwanted metadata found)."
65 if not nb_equals
66 else "no metadata to remove."
67 )
68 logger.debug(f"No action taken for {read_path} - " + msg)
69 else:
70 write_notebook(nb=notebook, path=write_path)
71 logger.debug(f"Removed metadata from {read_path}, saved as {write_path}")
73 return nb_equals
76def clear_all(
77 read_paths: List[Path],
78 write_paths: List[Path],
79 *,
80 progress_callback: Callable[[], None] = lambda: None,
81 **clear_kwargs: Any,
82) -> List[bool]:
83 """
84 Clear metadata for multiple notebooks at notebooks and cell level.
86 :param read_paths: Paths of notebook to remove metadata
87 :param write_paths: Paths of where to write cleaned notebooks
88 :param progress_callback: Callback function to report progress
89 :param clear_kwargs: Keyword arguments to be passed to `databooks.metadata.clear`
90 :return: Whether the notebooks contained or not unwanted metadata
91 """
92 if len(read_paths) != len(write_paths):
93 raise ValueError(
94 "Read and write paths must have same length."
95 f" Got {len(read_paths)} and {len(write_paths)}"
96 )
97 checks = []
98 for nb_path, write_path in zip(read_paths, write_paths):
99 checks.append(clear(read_path=nb_path, write_path=write_path, **clear_kwargs))
100 progress_callback()
101 return checks