Coverage for databooks/conflicts.py: 92%

38 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-04 16:41 +0000

1"""Functions to resolve any git conflicts between notebooks.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6from typing import Any, Callable, List, Optional, Sequence 

7 

8from git import Repo 

9 

10from databooks.common import find_common_parent 

11from databooks.data_models.notebook import JupyterNotebook 

12from databooks.git_utils import ConflictFile, get_conflict_blobs, get_repo 

13from databooks.logging import get_logger, set_verbose 

14 

15logger = get_logger(__file__) 

16 

17 

18def path2conflicts( 

19 nb_paths: List[Path], repo: Optional[Repo] = None 

20) -> List[ConflictFile]: 

21 """ 

22 Get the difference model from the path based on the git conflict information. 

23 

24 :param nb_paths: Path to file with conflicts (must be notebook paths) 

25 :param repo: The git repo to look for conflicts 

26 :return: Generator of `DiffModel`s, to be resolved 

27 """ 

28 if any(nb_path.suffix not in ("", ".ipynb") for nb_path in nb_paths): 

29 raise ValueError( 

30 "Expected either notebook files, a directory or glob expression." 

31 ) 

32 common_parent = find_common_parent(nb_paths) 

33 repo = get_repo(common_parent) if repo is None else repo 

34 return [ 

35 file 

36 for file in get_conflict_blobs(repo=repo) 

37 if any(file.filename.match(str(p.name)) for p in nb_paths) 

38 ] 

39 

40 

41def conflict2nb( 

42 conflict_file: ConflictFile, 

43 *, 

44 meta_first: bool = True, 

45 cells_first: Optional[bool] = None, 

46 cell_fields_ignore: Sequence[str] = ("id", "execution_count"), 

47 ignore_none: bool = True, 

48 verbose: bool = False, 

49) -> JupyterNotebook: 

50 """ 

51 Merge diffs from conflicts and return valid a notebook. 

52 

53 :param conflict_file: A `databooks.git_utils.ConflictFile` with conflicts 

54 :param meta_first: Whether to keep the metadata of the first or last notebook 

55 :param cells_first: Whether to keep the cells of the first or last notebook 

56 :param ignore_none: Keep all metadata fields even if it's included in only one 

57 notebook 

58 :param cell_fields_ignore: Fields to remove before comparing notebooks - i.e.: cell 

59 IDs or execution counts may not want to be considered 

60 :param verbose: Log written files and metadata conflicts 

61 :return: Resolved conflicts as a `databooks.data_models.notebook.JupyterNotebook` 

62 model 

63 """ 

64 if verbose: 

65 set_verbose(logger) 

66 

67 nb_1 = JupyterNotebook.parse_raw(conflict_file.first_contents) 

68 nb_2 = JupyterNotebook.parse_raw(conflict_file.last_contents) 

69 if nb_1.metadata != nb_2.metadata: 

70 msg = ( 

71 f"Notebook metadata conflict for {conflict_file.filename}. Keeping " 

72 + "first." 

73 if meta_first 

74 else "last." 

75 ) 

76 logger.debug(msg) 

77 

78 if cell_fields_ignore: 

79 for cells in (nb_1.cells, nb_2.cells): 

80 for cell in cells: 

81 cell.clear_fields( 

82 cell_metadata_remove=[], cell_remove_fields=cell_fields_ignore 

83 ) 

84 

85 diff_nb = nb_1 - nb_2 

86 nb = diff_nb.resolve( 

87 ignore_none=ignore_none, 

88 keep_first=meta_first, 

89 keep_first_cells=cells_first, 

90 first_id=conflict_file.first_log, 

91 last_id=conflict_file.last_log, 

92 ) 

93 if not isinstance(nb, JupyterNotebook): 

94 raise RuntimeError(f"Expected `databooks.JupyterNotebook`, got {type(nb)}.") 

95 

96 logger.debug(f"Resolved conflicts in {conflict_file.filename}.") 

97 return nb 

98 

99 

100def conflicts2nbs( 

101 conflict_files: List[ConflictFile], 

102 *, 

103 progress_callback: Callable[[], None] = lambda: None, 

104 **conflict2nb_kwargs: Any, 

105) -> None: 

106 """ 

107 Get notebooks from conflicts. 

108 

109 Wrap `databooks.conflicts.conflict2nb` to write notebooks to list of 

110 `databooks.git_utils.ConflictFile`. 

111 :param conflict_files: Files with source conflict files and one-liner git logs 

112 :param progress_callback: Callback function to report progress 

113 :param conflict2nb_kwargs: Keyword arguments to be passed to 

114 `databooks.conflicts.conflict2nb` 

115 :return: 

116 """ 

117 for conflict in conflict_files: 

118 nb = conflict2nb(conflict, **conflict2nb_kwargs) 

119 nb.write(path=conflict.filename, overwrite=True) 

120 progress_callback()