Coverage for databooks/conflicts.py: 90%
40 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
1"""Functions to resolve any git conflicts between notebooks."""
3from __future__ import annotations
5from pathlib import Path
6from typing import Any, Callable, List, Optional, Sequence
8from git import Repo
10from databooks.common import find_common_parent
11from databooks.data_models.notebook import JupyterNotebook
12from databooks.git_utils import ConflictFile, get_conflict_blobs, get_repo
13from databooks.logging import get_logger, set_verbose
15logger = get_logger(__file__)
18def path2conflicts(
19 nb_paths: List[Path], repo: Optional[Repo] = None
20) -> List[ConflictFile]:
21 """
22 Get the difference model from the path based on the git conflict information.
24 :param nb_paths: Path to file with conflicts (must be notebook paths)
25 :param repo: The git repo to look for conflicts
26 :return: Generator of `DiffModel`s, to be resolved
27 """
28 if any(nb_path.suffix not in ("", ".ipynb") for nb_path in nb_paths):
29 raise ValueError(
30 "Expected either notebook files, a directory or glob expression."
31 )
32 common_parent = find_common_parent(nb_paths)
33 repo = get_repo(common_parent) if repo is None else repo
34 if repo is None:
35 raise ValueError("No repo found - cannot compute conflict blobs.")
36 return [
37 file
38 for file in get_conflict_blobs(repo=repo)
39 if any(file.filename.match(str(p.name)) for p in nb_paths)
40 ]
43def conflict2nb(
44 conflict_file: ConflictFile,
45 *,
46 meta_first: bool = True,
47 cells_first: Optional[bool] = None,
48 cell_fields_ignore: Sequence[str] = ("id", "execution_count"),
49 ignore_none: bool = True,
50 verbose: bool = False,
51) -> JupyterNotebook:
52 """
53 Merge diffs from conflicts and return valid a notebook.
55 :param conflict_file: A `databooks.git_utils.ConflictFile` with conflicts
56 :param meta_first: Whether to keep the metadata of the first or last notebook
57 :param cells_first: Whether to keep the cells of the first or last notebook
58 :param ignore_none: Keep all metadata fields even if it's included in only one
59 notebook
60 :param cell_fields_ignore: Fields to remove before comparing notebooks - i.e.: cell
61 IDs or execution counts may not want to be considered
62 :param verbose: Log written files and metadata conflicts
63 :return: Resolved conflicts as a `databooks.data_models.notebook.JupyterNotebook`
64 model
65 """
66 if verbose:
67 set_verbose(logger)
69 nb_1 = JupyterNotebook.parse_raw(conflict_file.first_contents)
70 nb_2 = JupyterNotebook.parse_raw(conflict_file.last_contents)
71 if nb_1.metadata != nb_2.metadata:
72 msg = (
73 f"Notebook metadata conflict for {conflict_file.filename}. Keeping "
74 + "first."
75 if meta_first
76 else "last."
77 )
78 logger.debug(msg)
80 if cell_fields_ignore:
81 for cells in (nb_1.cells, nb_2.cells):
82 for cell in cells:
83 cell.clear_fields(
84 cell_metadata_remove=[], cell_remove_fields=cell_fields_ignore
85 )
87 diff_nb = nb_1 - nb_2
88 nb = diff_nb.resolve(
89 ignore_none=ignore_none,
90 keep_first=meta_first,
91 keep_first_cells=cells_first,
92 first_id=conflict_file.first_log,
93 last_id=conflict_file.last_log,
94 )
95 if not isinstance(nb, JupyterNotebook):
96 raise RuntimeError(f"Expected `databooks.JupyterNotebook`, got {type(nb)}.")
98 logger.debug(f"Resolved conflicts in {conflict_file.filename}.")
99 return nb
102def conflicts2nbs(
103 conflict_files: List[ConflictFile],
104 *,
105 progress_callback: Callable[[], None] = lambda: None,
106 **conflict2nb_kwargs: Any,
107) -> None:
108 """
109 Get notebooks from conflicts.
111 Wrap `databooks.conflicts.conflict2nb` to write notebooks to list of
112 `databooks.git_utils.ConflictFile`.
113 :param conflict_files: Files with source conflict files and one-liner git logs
114 :param progress_callback: Callback function to report progress
115 :param conflict2nb_kwargs: Keyword arguments to be passed to
116 `databooks.conflicts.conflict2nb`
117 :return:
118 """
119 for conflict in conflict_files:
120 nb = conflict2nb(conflict, **conflict2nb_kwargs)
121 nb.write(path=conflict.filename, overwrite=True)
122 progress_callback()