Coverage for databooks/metadata.py: 93%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Metadata wrapper functions for cleaning notebook metadata."""
2from pathlib import Path
3from typing import Any, Callable, List, Optional, Sequence
5from databooks import JupyterNotebook
6from databooks.common import get_logger, set_verbose, write_notebook
8logger = get_logger(__file__)
11def clear(
12 read_path: Path,
13 write_path: Optional[Path] = None,
14 notebook_metadata_keep: Sequence[str] = (),
15 cell_metadata_keep: Sequence[str] = (),
16 check: bool = False,
17 verbose: bool = False,
18 **kwargs: Any,
19) -> bool:
20 """
21 Clear Jupyter Notebook metadata.
23 Clear metadata (at notebook and cell level) and write clean
24 notebook. By default remove all metadata.
25 :param read_path: Path of notebook file with metadata to be cleaned
26 :param write_path: Path of notebook file with metadata to be cleaned
27 :param notebook_metadata_keep: Notebook metadata fields to keep
28 :param cell_metadata_keep: Cell metadata fields to keep
29 :param check: Don't write any files, check whether there is unwanted metadata
30 :param verbose: Log written files
31 :param kwargs: Additional keyword arguments to pass to
32 `databooks.data_models.JupyterNotebook.clear_metadata`
33 :return: Whether notebooks are equal
34 """
35 if verbose:
36 set_verbose(logger)
38 if write_path is None:
39 write_path = read_path
40 notebook = JupyterNotebook.parse_file(read_path)
42 notebook.clear_metadata(
43 notebook_metadata_keep=notebook_metadata_keep,
44 cell_metadata_keep=cell_metadata_keep,
45 **kwargs,
46 )
47 nb_equals = notebook == JupyterNotebook.parse_file(read_path)
49 if nb_equals or check:
50 msg = (
51 "only check (unwanted metadata found)."
52 if not nb_equals
53 else "no metadata to remove."
54 )
55 logger.debug(f"No action taken for {read_path} - " + msg)
56 else:
57 write_notebook(nb=notebook, path=write_path)
58 logger.debug(f"Removed metadata from {read_path}, saved as {write_path}")
60 return nb_equals
63def clear_all(
64 read_paths: List[Path],
65 write_paths: List[Path],
66 *,
67 progress_callback: Callable[[], None] = lambda: None,
68 **clear_kwargs: Any,
69) -> List[bool]:
70 """
71 Clear metadata for multiple notebooks at notebooks and cell level.
73 :param read_paths: Paths of notebook to remove metadata
74 :param write_paths: Paths of where to write cleaned notebooks
75 :param progress_callback: Callback function to report progress
76 :param clear_kwargs: Keyword arguments to be passed to `databooks.metadata.clear`
77 :return: Whether the notebooks contained or not unwanted metadata
78 """
79 if len(read_paths) != len(write_paths):
80 raise ValueError(
81 "Read and write paths must have same length."
82 f" Got {len(read_paths)} and {len(write_paths)}"
83 )
84 checks = []
85 for nb_path, write_path in zip(read_paths, write_paths):
86 checks.append(clear(read_path=nb_path, write_path=write_path, **clear_kwargs))
87 progress_callback()
88 return checks