Skip to content

Metadata

Metadata wrapper functions for cleaning notebook metadata.

clear(read_path, write_path=None, notebook_metadata_keep=(), cell_metadata_keep=(), cell_fields_keep=[], check=False, verbose=False, **kwargs)

Clear Jupyter Notebook metadata.

Clear metadata (at notebook and cell level) and write clean notebook. By default remove all metadata.

Parameters:

Name Type Description Default
read_path Path

Path of notebook file with metadata to be cleaned

required
write_path Optional[pathlib.Path]

Path of notebook file with metadata to be cleaned

None
notebook_metadata_keep Sequence[str]

Notebook metadata fields to keep

()
cell_metadata_keep Sequence[str]

Cell metadata fields to keep

()
cell_fields_keep List[str]

Cell fields to keep

[]
check bool

Don't write any files, check whether there is unwanted metadata

False
verbose bool

Log written files

False
kwargs Any

Additional keyword arguments to pass to databooks.data_models.JupyterNotebook.clear_metadata

{}

Returns:

Type Description
bool

Whether notebooks are equal

Source code in databooks/metadata.py
def clear(
    read_path: Path,
    write_path: Optional[Path] = None,
    notebook_metadata_keep: Sequence[str] = (),
    cell_metadata_keep: Sequence[str] = (),
    cell_fields_keep: List[str] = [],
    check: bool = False,
    verbose: bool = False,
    **kwargs: Any,
) -> bool:
    """
    Clear Jupyter Notebook metadata.

    Clear metadata (at notebook and cell level) and write clean
     notebook. By default remove all metadata.
    :param read_path: Path of notebook file with metadata to be cleaned
    :param write_path: Path of notebook file with metadata to be cleaned
    :param notebook_metadata_keep: Notebook metadata fields to keep
    :param cell_metadata_keep: Cell metadata fields to keep
    :param cell_fields_keep: Cell fields to keep
    :param check: Don't write any files, check whether there is unwanted metadata
    :param verbose: Log written files
    :param kwargs: Additional keyword arguments to pass to
     `databooks.data_models.JupyterNotebook.clear_metadata`
    :return: Whether notebooks are equal
    """
    if verbose:
        set_verbose(logger)

    if write_path is None:
        write_path = read_path
    notebook = JupyterNotebook.parse_file(read_path)

    # Get fields to remove from cells
    cell_fields = {field for cell in notebook.cells for field, _ in cell if field}
    cell_fields_keep += list(Cell.__fields__)  # required field for notebook schema

    cell_remove_fields = [
        field for field in cell_fields if field not in cell_fields_keep
    ]

    notebook.clear_metadata(
        notebook_metadata_keep=notebook_metadata_keep,
        cell_metadata_keep=cell_metadata_keep,
        cell_remove_fields=cell_remove_fields,
        **kwargs,
    )
    nb_equals = notebook == JupyterNotebook.parse_file(read_path)

    if nb_equals or check:
        msg = (
            "only check (unwanted metadata found)."
            if not nb_equals
            else "no metadata to remove."
        )
        logger.debug(f"No action taken for {read_path} - " + msg)
    else:
        write_notebook(nb=notebook, path=write_path)
        logger.debug(f"Removed metadata from {read_path}, saved as {write_path}")

    return nb_equals

clear_all(read_paths, write_paths, *, progress_callback=<function <lambda> at 0x7f1f72dc7310>, **clear_kwargs)

Clear metadata for multiple notebooks at notebooks and cell level.

Parameters:

Name Type Description Default
read_paths List[pathlib.Path]

Paths of notebook to remove metadata

required
write_paths List[pathlib.Path]

Paths of where to write cleaned notebooks

required
progress_callback Callable[[], NoneType]

Callback function to report progress

<function <lambda> at 0x7f1f72dc7310>
clear_kwargs Any

Keyword arguments to be passed to databooks.metadata.clear

{}

Returns:

Type Description
List[bool]

Whether the notebooks contained or not unwanted metadata

Source code in databooks/metadata.py
def clear_all(
    read_paths: List[Path],
    write_paths: List[Path],
    *,
    progress_callback: Callable[[], None] = lambda: None,
    **clear_kwargs: Any,
) -> List[bool]:
    """
    Clear metadata for multiple notebooks at notebooks and cell level.

    :param read_paths: Paths of notebook to remove metadata
    :param write_paths: Paths of where to write cleaned notebooks
    :param progress_callback: Callback function to report progress
    :param clear_kwargs: Keyword arguments to be passed to `databooks.metadata.clear`
    :return: Whether the notebooks contained or not unwanted metadata
    """
    if len(read_paths) != len(write_paths):
        raise ValueError(
            "Read and write paths must have same length."
            f" Got {len(read_paths)} and {len(write_paths)}"
        )
    checks = []
    for nb_path, write_path in zip(read_paths, write_paths):
        checks.append(clear(read_path=nb_path, write_path=write_path, **clear_kwargs))
        progress_callback()
    return checks
Back to top