Coverage for databooks/metadata.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

32 statements  

1"""Metadata wrapper functions for cleaning notebook metadata.""" 

2from pathlib import Path 

3from typing import Any, Callable, List, Optional, Sequence 

4 

5from databooks import JupyterNotebook 

6from databooks.common import write_notebook 

7from databooks.data_models.notebook import Cell 

8from databooks.logging import get_logger, set_verbose 

9 

10logger = get_logger(__file__) 

11 

12 

13def clear( 

14 read_path: Path, 

15 write_path: Optional[Path] = None, 

16 notebook_metadata_keep: Sequence[str] = (), 

17 cell_metadata_keep: Sequence[str] = (), 

18 cell_fields_keep: List[str] = [], 

19 check: bool = False, 

20 verbose: bool = False, 

21 **kwargs: Any, 

22) -> bool: 

23 """ 

24 Clear Jupyter Notebook metadata. 

25 

26 Clear metadata (at notebook and cell level) and write clean 

27 notebook. By default remove all metadata. 

28 :param read_path: Path of notebook file with metadata to be cleaned 

29 :param write_path: Path of notebook file with metadata to be cleaned 

30 :param notebook_metadata_keep: Notebook metadata fields to keep 

31 :param cell_metadata_keep: Cell metadata fields to keep 

32 :param cell_fields_keep: Cell fields to keep 

33 :param check: Don't write any files, check whether there is unwanted metadata 

34 :param verbose: Log written files 

35 :param kwargs: Additional keyword arguments to pass to 

36 `databooks.data_models.JupyterNotebook.clear_metadata` 

37 :return: Whether notebooks are equal 

38 """ 

39 if verbose: 

40 set_verbose(logger) 

41 

42 if write_path is None: 

43 write_path = read_path 

44 notebook = JupyterNotebook.parse_file(read_path) 

45 

46 # Get fields to remove from cells 

47 cell_fields = {field for cell in notebook.cells for field, _ in cell if field} 

48 cell_fields_keep += list(Cell.__fields__) # required field for notebook schema 

49 

50 cell_remove_fields = [ 

51 field for field in cell_fields if field not in cell_fields_keep 

52 ] 

53 

54 notebook.clear_metadata( 

55 notebook_metadata_keep=notebook_metadata_keep, 

56 cell_metadata_keep=cell_metadata_keep, 

57 cell_remove_fields=cell_remove_fields, 

58 **kwargs, 

59 ) 

60 nb_equals = notebook == JupyterNotebook.parse_file(read_path) 

61 

62 if nb_equals or check: 

63 msg = ( 

64 "only check (unwanted metadata found)." 

65 if not nb_equals 

66 else "no metadata to remove." 

67 ) 

68 logger.debug(f"No action taken for {read_path} - " + msg) 

69 else: 

70 write_notebook(nb=notebook, path=write_path) 

71 logger.debug(f"Removed metadata from {read_path}, saved as {write_path}") 

72 

73 return nb_equals 

74 

75 

76def clear_all( 

77 read_paths: List[Path], 

78 write_paths: List[Path], 

79 *, 

80 progress_callback: Callable[[], None] = lambda: None, 

81 **clear_kwargs: Any, 

82) -> List[bool]: 

83 """ 

84 Clear metadata for multiple notebooks at notebooks and cell level. 

85 

86 :param read_paths: Paths of notebook to remove metadata 

87 :param write_paths: Paths of where to write cleaned notebooks 

88 :param progress_callback: Callback function to report progress 

89 :param clear_kwargs: Keyword arguments to be passed to `databooks.metadata.clear` 

90 :return: Whether the notebooks contained or not unwanted metadata 

91 """ 

92 if len(read_paths) != len(write_paths): 

93 raise ValueError( 

94 "Read and write paths must have same length." 

95 f" Got {len(read_paths)} and {len(write_paths)}" 

96 ) 

97 checks = [] 

98 for nb_path, write_path in zip(read_paths, write_paths): 

99 checks.append(clear(read_path=nb_path, write_path=write_path, **clear_kwargs)) 

100 progress_callback() 

101 return checks