Coverage for databooks/cli.py: 92%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Main CLI application."""
2from itertools import compress
3from pathlib import Path
4from typing import List, Optional
6import tomli
7from rich.progress import (
8 BarColumn,
9 Progress,
10 SpinnerColumn,
11 TextColumn,
12 TimeElapsedColumn,
13)
14from rich.prompt import Confirm
15from typer import Argument, BadParameter, Context, Exit, Option, Typer, echo
17from databooks.affirm import affirm_all
18from databooks.common import expand_paths
19from databooks.config import TOML_CONFIG_FILE, get_config
20from databooks.conflicts import conflicts2nbs, path2conflicts
21from databooks.logging import get_logger
22from databooks.metadata import clear_all
23from databooks.recipes import Recipe
24from databooks.version import __version__
26logger = get_logger(__file__)
28app = Typer()
31def _version_callback(show_version: bool) -> None:
32 """Return application version."""
33 if show_version:
34 echo("databooks version: " + __version__)
35 raise Exit()
38def _help_callback(ctx: Context, show_help: Optional[bool]) -> None:
39 """Reimplement `help` command to execute eagerly."""
40 if show_help:
41 echo(ctx.command.get_help(ctx))
42 raise Exit()
45def _config_callback(ctx: Context, config_path: Optional[Path]) -> Optional[Path]:
46 """Get config file and inject values into context to override default args."""
47 target_paths = expand_paths(
48 paths=[Path(p) for p in ctx.params.get("paths", ())], rglob="*"
49 )
50 config_path = (
51 get_config(
52 target_paths=target_paths,
53 config_filename=TOML_CONFIG_FILE,
54 )
55 if config_path is None and target_paths
56 else config_path
57 )
58 logger.debug(f"Loading config file from: {config_path}")
60 if config_path is not None: # config may not be specified
61 with config_path.open("rb") as f:
62 conf = (
63 tomli.load(f)
64 .get("tool", {})
65 .get("databooks", {})
66 .get(ctx.command.name, {})
67 )
68 # Merge configuration
69 ctx.default_map = {
70 **(ctx.default_map or {}),
71 **{k.replace("-", "_"): v for k, v in conf.items()},
72 }
73 return config_path
76def _check_paths(paths: List[Path], ignore: List[str]) -> List[Path]:
77 if any(path.suffix not in ("", ".ipynb") for path in paths):
78 raise BadParameter(
79 "Expected either notebook files, a directory or glob expression."
80 )
81 nb_paths = expand_paths(paths=paths, ignore=ignore)
82 if not nb_paths:
83 logger.info(f"No notebooks found in {paths}. Nothing to do.")
84 raise Exit()
85 return nb_paths
88@app.callback()
89def callback( # noqa: D103
90 version: Optional[bool] = Option(
91 None, "--version", callback=_version_callback, is_eager=True
92 )
93) -> None:
94 """CLI tool to resolve git conflicts and remove metadata in notebooks."""
97@app.command(add_help_option=False)
98def meta(
99 paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"),
100 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),
101 prefix: str = Option("", help="Prefix to add to filepath when writing files"),
102 suffix: str = Option("", help="Suffix to add to filepath when writing files"),
103 rm_outs: bool = Option(False, help="Whether to remove cell outputs"),
104 rm_exec: bool = Option(True, help="Whether to remove the cell execution counts"),
105 nb_meta_keep: List[str] = Option((), help="Notebook metadata fields to keep"),
106 cell_meta_keep: List[str] = Option((), help="Cells metadata fields to keep"),
107 cell_fields_keep: List[str] = Option(
108 (),
109 help="Other (excluding `execution_counts` and `outputs`) cell fields to keep",
110 ),
111 overwrite: bool = Option(False, "--yes", "-y", help="Confirm overwrite of files"),
112 check: bool = Option(
113 False,
114 "--check",
115 help="Don't write files but check whether there is unwanted metadata",
116 ),
117 verbose: bool = Option(
118 False, "--verbose", "-v", help="Log processed files in console"
119 ),
120 config: Optional[Path] = Option(
121 None,
122 "--config",
123 "-c",
124 is_eager=True,
125 callback=_config_callback,
126 resolve_path=True,
127 exists=True,
128 help="Get CLI options from configuration file",
129 ),
130 help: Optional[bool] = Option(
131 None,
132 "--help",
133 is_eager=True,
134 callback=_help_callback,
135 help="Show this message and exit",
136 ),
137) -> None:
138 """Clear both notebook and cell metadata."""
139 nb_paths = _check_paths(paths=paths, ignore=ignore)
141 if not bool(prefix + suffix) and not check:
142 overwrite = (
143 Confirm.ask(
144 f"{len(nb_paths)} files will be overwritten"
145 " (no prefix nor suffix was passed). Continue?"
146 )
147 if not overwrite
148 else overwrite
149 )
150 if not overwrite:
151 raise Exit()
152 else:
153 logger.warning(f"{len(nb_paths)} files will be overwritten")
155 write_paths = [p.parent / (prefix + p.stem + suffix + p.suffix) for p in nb_paths]
156 cell_fields_keep = list(
157 compress(["outputs", "execution_count"], (not v for v in (rm_outs, rm_exec)))
158 ) + list(cell_fields_keep)
159 with Progress(
160 SpinnerColumn(),
161 TextColumn("[progress.description]{task.description}"),
162 BarColumn(),
163 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
164 TimeElapsedColumn(),
165 ) as progress:
166 metadata = progress.add_task("[yellow]Removing metadata", total=len(nb_paths))
168 are_equal = clear_all(
169 read_paths=nb_paths,
170 write_paths=write_paths,
171 progress_callback=lambda: progress.update(metadata, advance=1),
172 notebook_metadata_keep=nb_meta_keep,
173 cell_metadata_keep=cell_meta_keep,
174 cell_fields_keep=cell_fields_keep,
175 check=check,
176 verbose=verbose,
177 overwrite=overwrite,
178 )
179 if check:
180 if all(are_equal):
181 logger.info("No unwanted metadata!")
182 else:
183 logger.info(
184 f"Found unwanted metadata in {sum(not eq for eq in are_equal)} out of"
185 f" {len(are_equal)} files."
186 )
187 raise Exit(code=1)
188 else:
189 logger.info(
190 f"The metadata of {sum(not eq for eq in are_equal)} out of {len(are_equal)}"
191 " notebooks were removed!"
192 )
195@app.command("assert", add_help_option=False)
196def affirm_meta(
197 paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"),
198 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),
199 expr: List[str] = Option(
200 (), "--expr", "-x", help="Expressions to assert on notebooks"
201 ),
202 recipe: List[Recipe] = Option(
203 (),
204 "--recipe",
205 "-r",
206 help="Common recipes of expressions - see"
207 " https://databooks.dev/0.1.15/usage/overview/#recipes",
208 ),
209 verbose: bool = Option(
210 False, "--verbose", "-v", help="Log processed files in console"
211 ),
212 config: Optional[Path] = Option(
213 None,
214 "--config",
215 "-c",
216 is_eager=True,
217 callback=_config_callback,
218 resolve_path=True,
219 exists=True,
220 help="Get CLI options from configuration file",
221 ),
222 help: Optional[bool] = Option(
223 None,
224 "--help",
225 is_eager=True,
226 callback=_help_callback,
227 help="Show this message and exit",
228 ),
229) -> None:
230 """
231 Assert notebook metadata has desired values.
233 Pass one (or multiple) strings or recipes. The available variables in scope include
234 `nb` (notebook), `raw_cells` (notebook cells of `raw` type), `md_cells` (notebook
235 cells of `markdown` type), `code_cells` (notebook cells of `code` type) and
236 `exec_cells` (notebook cells of `code` type that were executed - have an `execution
237 count` value). Recipes can be found on `databooks.recipes.CookBook`.
238 """
239 nb_paths = _check_paths(paths=paths, ignore=ignore)
240 exprs = [r.name for r in recipe] + list(expr)
241 if not exprs:
242 raise BadParameter("Must specify at least one of `expr` or `recipe`.")
244 with Progress(
245 SpinnerColumn(),
246 TextColumn("[progress.description]{task.description}"),
247 BarColumn(),
248 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
249 TimeElapsedColumn(),
250 ) as progress:
251 assert_checks = progress.add_task(
252 "[yellow]Running assert checks", total=len(nb_paths)
253 )
255 are_ok = affirm_all(
256 nb_paths=nb_paths,
257 progress_callback=lambda: progress.update(assert_checks, advance=1),
258 exprs=exprs,
259 verbose=verbose,
260 )
262 if all(are_ok):
263 logger.info("All notebooks comply with the desired metadata!")
264 else:
265 logger.info(
266 f"Found issues in notebook metadata for {sum(not ok for ok in are_ok)} out"
267 f" of {len(are_ok)} notebooks."
268 )
269 raise Exit(code=1)
272@app.command(add_help_option=False)
273def fix(
274 paths: List[Path] = Argument(
275 ..., is_eager=True, help="Path(s) of notebook files with conflicts"
276 ),
277 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),
278 metadata_head: bool = Option(
279 True, help="Whether or not to keep the metadata from the head/current notebook"
280 ),
281 cells_head: Optional[bool] = Option(
282 None,
283 help="Whether to keep the cells from the head/base notebook. Omit to keep both",
284 ),
285 cell_fields_ignore: List[str] = Option(
286 [
287 "id",
288 "execution_count",
289 ],
290 help="Cell fields to remove before comparing cells",
291 ),
292 interactive: bool = Option(
293 False,
294 "--interactive",
295 "-i",
296 help="Interactively resolve the conflicts (not implemented)",
297 ),
298 verbose: bool = Option(False, help="Log processed files in console"),
299 config: Optional[Path] = Option(
300 None,
301 "--config",
302 "-c",
303 is_eager=True,
304 callback=_config_callback,
305 resolve_path=True,
306 exists=True,
307 help="Get CLI options from configuration file",
308 ),
309 help: Optional[bool] = Option(
310 None,
311 "--help",
312 is_eager=True,
313 callback=_help_callback,
314 help="Show this message and exit",
315 ),
316) -> None:
317 """
318 Fix git conflicts for notebooks.
320 Perform by getting the unmerged blobs from git index, comparing them and returning
321 a valid notebook summarizing the differences - see
322 [git docs](https://git-scm.com/docs/git-ls-files).
323 """
324 filepaths = expand_paths(paths=paths, ignore=ignore)
325 conflict_files = path2conflicts(nb_paths=filepaths)
326 if not conflict_files:
327 raise BadParameter(
328 f"No conflicts found at {', '.join([str(p) for p in filepaths])}."
329 )
330 if interactive:
331 raise NotImplementedError
333 with Progress(
334 SpinnerColumn(),
335 TextColumn("[progress.description]{task.description}"),
336 BarColumn(),
337 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
338 TimeElapsedColumn(),
339 ) as progress:
340 conflicts = progress.add_task(
341 "[yellow]Resolving conflicts", total=len(conflict_files)
342 )
343 conflicts2nbs(
344 conflict_files=conflict_files,
345 meta_first=metadata_head,
346 cells_first=cells_head,
347 cell_fields_ignore=cell_fields_ignore,
348 verbose=verbose,
349 progress_callback=lambda: progress.update(conflicts, advance=1),
350 )
351 logger.info(f"Resolved the conflicts of {len(conflict_files)}!")
354@app.command()
355def diff() -> None:
356 """Show differences between notebooks (not implemented)."""
357 raise NotImplementedError