Coverage for databooks/metadata.py: 93%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

27 statements  

1"""Metadata wrapper functions for cleaning notebook metadata.""" 

2from pathlib import Path 

3from typing import Any, Callable, List, Optional, Sequence 

4 

5from databooks import JupyterNotebook 

6from databooks.common import get_logger, set_verbose, write_notebook 

7 

8logger = get_logger(__file__) 

9 

10 

11def clear( 

12 read_path: Path, 

13 write_path: Optional[Path] = None, 

14 notebook_metadata_keep: Sequence[str] = (), 

15 cell_metadata_keep: Sequence[str] = (), 

16 check: bool = False, 

17 verbose: bool = False, 

18 **kwargs: Any, 

19) -> bool: 

20 """ 

21 Clear Jupyter Notebook metadata. 

22 

23 Clear metadata (at notebook and cell level) and write clean 

24 notebook. By default remove all metadata. 

25 :param read_path: Path of notebook file with metadata to be cleaned 

26 :param write_path: Path of notebook file with metadata to be cleaned 

27 :param notebook_metadata_keep: Notebook metadata fields to keep 

28 :param cell_metadata_keep: Cell metadata fields to keep 

29 :param check: Don't write any files, check whether there is unwanted metadata 

30 :param verbose: Log written files 

31 :param kwargs: Additional keyword arguments to pass to 

32 `databooks.data_models.JupyterNotebook.clear_metadata` 

33 :return: Whether notebooks are equal 

34 """ 

35 if verbose: 

36 set_verbose(logger) 

37 

38 if write_path is None: 

39 write_path = read_path 

40 notebook = JupyterNotebook.parse_file(read_path) 

41 

42 notebook.clear_metadata( 

43 notebook_metadata_keep=notebook_metadata_keep, 

44 cell_metadata_keep=cell_metadata_keep, 

45 **kwargs, 

46 ) 

47 nb_equals = notebook == JupyterNotebook.parse_file(read_path) 

48 

49 if nb_equals or check: 

50 msg = ( 

51 "only check (unwanted metadata found)." 

52 if not nb_equals 

53 else "no metadata to remove." 

54 ) 

55 logger.debug(f"No action taken for {read_path} - " + msg) 

56 else: 

57 write_notebook(nb=notebook, path=write_path) 

58 logger.debug(f"Removed metadata from {read_path}, saved as {write_path}") 

59 

60 return nb_equals 

61 

62 

63def clear_all( 

64 read_paths: List[Path], 

65 write_paths: List[Path], 

66 *, 

67 progress_callback: Callable[[], None] = lambda: None, 

68 **clear_kwargs: Any, 

69) -> List[bool]: 

70 """ 

71 Clear metadata for multiple notebooks at notebooks and cell level. 

72 

73 :param read_paths: Paths of notebook to remove metadata 

74 :param write_paths: Paths of where to write cleaned notebooks 

75 :param progress_callback: Callback function to report progress 

76 :param clear_kwargs: Keyword arguments to be passed to `databooks.metadata.clear` 

77 :return: Whether the notebooks contained or not unwanted metadata 

78 """ 

79 if len(read_paths) != len(write_paths): 

80 raise ValueError( 

81 "Read and write paths must have same length." 

82 f" Got {len(read_paths)} and {len(write_paths)}" 

83 ) 

84 checks = [] 

85 for nb_path, write_path in zip(read_paths, write_paths): 

86 checks.append(clear(read_path=nb_path, write_path=write_path, **clear_kwargs)) 

87 progress_callback() 

88 return checks