Coverage for databooks/conflicts.py: 90%

40 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-09 13:11 +0000

1"""Functions to resolve any git conflicts between notebooks.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6from typing import Any, Callable, List, Optional, Sequence 

7 

8from git import Repo 

9 

10from databooks.common import find_common_parent 

11from databooks.data_models.notebook import JupyterNotebook 

12from databooks.git_utils import ConflictFile, get_conflict_blobs, get_repo 

13from databooks.logging import get_logger, set_verbose 

14 

15logger = get_logger(__file__) 

16 

17 

18def path2conflicts( 

19 nb_paths: List[Path], repo: Optional[Repo] = None 

20) -> List[ConflictFile]: 

21 """ 

22 Get the difference model from the path based on the git conflict information. 

23 

24 :param nb_paths: Path to file with conflicts (must be notebook paths) 

25 :param repo: The git repo to look for conflicts 

26 :return: Generator of `DiffModel`s, to be resolved 

27 """ 

28 if any(nb_path.suffix not in ("", ".ipynb") for nb_path in nb_paths): 

29 raise ValueError( 

30 "Expected either notebook files, a directory or glob expression." 

31 ) 

32 common_parent = find_common_parent(nb_paths) 

33 repo = get_repo(common_parent) if repo is None else repo 

34 if repo is None: 

35 raise ValueError("No repo found - cannot compute conflict blobs.") 

36 return [ 

37 file 

38 for file in get_conflict_blobs(repo=repo) 

39 if any(file.filename.match(str(p.name)) for p in nb_paths) 

40 ] 

41 

42 

43def conflict2nb( 

44 conflict_file: ConflictFile, 

45 *, 

46 meta_first: bool = True, 

47 cells_first: Optional[bool] = None, 

48 cell_fields_ignore: Sequence[str] = ("id", "execution_count"), 

49 ignore_none: bool = True, 

50 verbose: bool = False, 

51) -> JupyterNotebook: 

52 """ 

53 Merge diffs from conflicts and return valid a notebook. 

54 

55 :param conflict_file: A `databooks.git_utils.ConflictFile` with conflicts 

56 :param meta_first: Whether to keep the metadata of the first or last notebook 

57 :param cells_first: Whether to keep the cells of the first or last notebook 

58 :param ignore_none: Keep all metadata fields even if it's included in only one 

59 notebook 

60 :param cell_fields_ignore: Fields to remove before comparing notebooks - i.e.: cell 

61 IDs or execution counts may not want to be considered 

62 :param verbose: Log written files and metadata conflicts 

63 :return: Resolved conflicts as a `databooks.data_models.notebook.JupyterNotebook` 

64 model 

65 """ 

66 if verbose: 

67 set_verbose(logger) 

68 

69 nb_1 = JupyterNotebook.parse_raw(conflict_file.first_contents) 

70 nb_2 = JupyterNotebook.parse_raw(conflict_file.last_contents) 

71 if nb_1.metadata != nb_2.metadata: 

72 msg = ( 

73 f"Notebook metadata conflict for {conflict_file.filename}. Keeping " 

74 + "first." 

75 if meta_first 

76 else "last." 

77 ) 

78 logger.debug(msg) 

79 

80 if cell_fields_ignore: 

81 for cells in (nb_1.cells, nb_2.cells): 

82 for cell in cells: 

83 cell.clear_fields( 

84 cell_metadata_remove=[], cell_remove_fields=cell_fields_ignore 

85 ) 

86 

87 diff_nb = nb_1 - nb_2 

88 nb = diff_nb.resolve( 

89 ignore_none=ignore_none, 

90 keep_first=meta_first, 

91 keep_first_cells=cells_first, 

92 first_id=conflict_file.first_log, 

93 last_id=conflict_file.last_log, 

94 ) 

95 if not isinstance(nb, JupyterNotebook): 

96 raise RuntimeError(f"Expected `databooks.JupyterNotebook`, got {type(nb)}.") 

97 

98 logger.debug(f"Resolved conflicts in {conflict_file.filename}.") 

99 return nb 

100 

101 

102def conflicts2nbs( 

103 conflict_files: List[ConflictFile], 

104 *, 

105 progress_callback: Callable[[], None] = lambda: None, 

106 **conflict2nb_kwargs: Any, 

107) -> None: 

108 """ 

109 Get notebooks from conflicts. 

110 

111 Wrap `databooks.conflicts.conflict2nb` to write notebooks to list of 

112 `databooks.git_utils.ConflictFile`. 

113 :param conflict_files: Files with source conflict files and one-liner git logs 

114 :param progress_callback: Callback function to report progress 

115 :param conflict2nb_kwargs: Keyword arguments to be passed to 

116 `databooks.conflicts.conflict2nb` 

117 :return: 

118 """ 

119 for conflict in conflict_files: 

120 nb = conflict2nb(conflict, **conflict2nb_kwargs) 

121 nb.write(path=conflict.filename, overwrite=True) 

122 progress_callback()