Coverage for databooks/cli.py: 92%

1"""Main CLI application."""

2from itertools import compress

3from pathlib import Path

4from typing import List, Optional

6import tomli

7from rich.progress import (

8 BarColumn,

9 Progress,

10 SpinnerColumn,

11 TextColumn,

12 TimeElapsedColumn,

13)

14from rich.prompt import Confirm

15from typer import Argument, BadParameter, Context, Exit, Option, Typer, echo

17from databooks.affirm import affirm_all

18from databooks.common import expand_paths

19from databooks.config import TOML_CONFIG_FILE, get_config

20from databooks.conflicts import conflicts2nbs, path2conflicts

21from databooks.logging import get_logger

22from databooks.metadata import clear_all

23from databooks.recipes import Recipe

24from databooks.version import __version__

26logger = get_logger(__file__)

28app = Typer()

31def _version_callback(show_version: bool) -> None:

32 """Return application version."""

33 if show_version:

34 echo("databooks version: " + __version__)

35 raise Exit()

38def _help_callback(ctx: Context, show_help: Optional[bool]) -> None:

39 """Reimplement `help` command to execute eagerly."""

40 if show_help:

41 echo(ctx.command.get_help(ctx))

42 raise Exit()

45def _config_callback(ctx: Context, config_path: Optional[Path]) -> Optional[Path]:

46 """Get config file and inject values into context to override default args."""

47 target_paths = expand_paths(

48 paths=[Path(p) for p in ctx.params.get("paths", ())], rglob="*"

49 )

50 config_path = (

51 get_config(

52 target_paths=target_paths,

53 config_filename=TOML_CONFIG_FILE,

54 )

55 if config_path is None and target_paths

56 else config_path

57 )

58 logger.debug(f"Loading config file from: {config_path}")

60 if config_path is not None: # config may not be specified

61 with config_path.open("rb") as f:

62 conf = (

63 tomli.load(f)

64 .get("tool", {})

65 .get("databooks", {})

66 .get(ctx.command.name, {})

67 )

68 # Merge configuration

69 ctx.default_map = {

70 **(ctx.default_map or {}),

71 **{k.replace("-", "_"): v for k, v in conf.items()},

72 }

73 return config_path

76def _check_paths(paths: List[Path], ignore: List[str]) -> List[Path]:

77 if any(path.suffix not in ("", ".ipynb") for path in paths):

78 raise BadParameter(

79 "Expected either notebook files, a directory or glob expression."

80 )

81 nb_paths = expand_paths(paths=paths, ignore=ignore)

82 if not nb_paths:

83 logger.info(f"No notebooks found in {paths}. Nothing to do.")

84 raise Exit()

85 return nb_paths

88@app.callback()

89def callback( # noqa: D103

90 version: Optional[bool] = Option(

91 None, "--version", callback=_version_callback, is_eager=True

92 )

93) -> None:

94 """CLI tool to resolve git conflicts and remove metadata in notebooks."""

97@app.command(add_help_option=False)

98def meta(

99 paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"),

100 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),

101 prefix: str = Option("", help="Prefix to add to filepath when writing files"),

102 suffix: str = Option("", help="Suffix to add to filepath when writing files"),

103 rm_outs: bool = Option(False, help="Whether to remove cell outputs"),

104 rm_exec: bool = Option(True, help="Whether to remove the cell execution counts"),

105 nb_meta_keep: List[str] = Option((), help="Notebook metadata fields to keep"),

106 cell_meta_keep: List[str] = Option((), help="Cells metadata fields to keep"),

107 cell_fields_keep: List[str] = Option(

108 (),

109 help="Other (excluding `execution_counts` and `outputs`) cell fields to keep",

110 ),

111 overwrite: bool = Option(False, "--yes", "-y", help="Confirm overwrite of files"),

112 check: bool = Option(

113 False,

114 "--check",

115 help="Don't write files but check whether there is unwanted metadata",

116 ),

117 verbose: bool = Option(

118 False, "--verbose", "-v", help="Log processed files in console"

119 ),

120 config: Optional[Path] = Option(

121 None,

122 "--config",

123 "-c",

124 is_eager=True,

125 callback=_config_callback,

126 resolve_path=True,

127 exists=True,

128 help="Get CLI options from configuration file",

129 ),

130 help: Optional[bool] = Option(

131 None,

132 "--help",

133 is_eager=True,

134 callback=_help_callback,

135 help="Show this message and exit",

136 ),

137) -> None:

138 """Clear both notebook and cell metadata."""

139 nb_paths = _check_paths(paths=paths, ignore=ignore)

140

141 if not bool(prefix + suffix) and not check:

142 overwrite = (

143 Confirm.ask(

144 f"{len(nb_paths)} files will be overwritten"

145 " (no prefix nor suffix was passed). Continue?"

146 )

147 if not overwrite

148 else overwrite

149 )

150 if not overwrite:

151 raise Exit()

152 else:

153 logger.warning(f"{len(nb_paths)} files will be overwritten")

154

155 write_paths = [p.parent / (prefix + p.stem + suffix + p.suffix) for p in nb_paths]

156 cell_fields_keep = list(

157 compress(["outputs", "execution_count"], (not v for v in (rm_outs, rm_exec)))

158 ) + list(cell_fields_keep)

159 with Progress(

160 SpinnerColumn(),

161 TextColumn("[progress.description]{task.description}"),

162 BarColumn(),

163 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),

164 TimeElapsedColumn(),

165 ) as progress:

166 metadata = progress.add_task("[yellow]Removing metadata", total=len(nb_paths))

167

168 are_equal = clear_all(

169 read_paths=nb_paths,

170 write_paths=write_paths,

171 progress_callback=lambda: progress.update(metadata, advance=1),

172 notebook_metadata_keep=nb_meta_keep,

173 cell_metadata_keep=cell_meta_keep,

174 cell_fields_keep=cell_fields_keep,

175 check=check,

176 verbose=verbose,

177 overwrite=overwrite,

178 )

179 if check:

180 if all(are_equal):

181 logger.info("No unwanted metadata!")

182 else:

183 logger.info(

184 f"Found unwanted metadata in {sum(not eq for eq in are_equal)} out of"

185 f" {len(are_equal)} files."

186 )

187 raise Exit(code=1)

188 else:

189 logger.info(

190 f"The metadata of {sum(not eq for eq in are_equal)} out of {len(are_equal)}"

191 " notebooks were removed!"

192 )

193

194

195@app.command("assert", add_help_option=False)

196def affirm_meta(

197 paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"),

198 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),

199 expr: List[str] = Option(

200 (), "--expr", "-x", help="Expressions to assert on notebooks"

201 ),

202 recipe: List[Recipe] = Option(

203 (),

204 "--recipe",

205 "-r",

206 help="Common recipes of expressions - see"

207 " https://databooks.dev/0.1.15/usage/overview/#recipes",

208 ),

209 verbose: bool = Option(

210 False, "--verbose", "-v", help="Log processed files in console"

211 ),

212 config: Optional[Path] = Option(

213 None,

214 "--config",

215 "-c",

216 is_eager=True,

217 callback=_config_callback,

218 resolve_path=True,

219 exists=True,

220 help="Get CLI options from configuration file",

221 ),

222 help: Optional[bool] = Option(

223 None,

224 "--help",

225 is_eager=True,

226 callback=_help_callback,

227 help="Show this message and exit",

228 ),

229) -> None:

230 """

231 Assert notebook metadata has desired values.

232

233 Pass one (or multiple) strings or recipes. The available variables in scope include

234 `nb` (notebook), `raw_cells` (notebook cells of `raw` type), `md_cells` (notebook

235 cells of `markdown` type), `code_cells` (notebook cells of `code` type) and

236 `exec_cells` (notebook cells of `code` type that were executed - have an `execution

237 count` value). Recipes can be found on `databooks.recipes.CookBook`.

238 """

239 nb_paths = _check_paths(paths=paths, ignore=ignore)

240 exprs = [r.name for r in recipe] + list(expr)

241 if not exprs:

242 raise BadParameter("Must specify at least one of `expr` or `recipe`.")

243

244 with Progress(

245 SpinnerColumn(),

246 TextColumn("[progress.description]{task.description}"),

247 BarColumn(),

248 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),

249 TimeElapsedColumn(),

250 ) as progress:

251 assert_checks = progress.add_task(

252 "[yellow]Running assert checks", total=len(nb_paths)

253 )

254

255 are_ok = affirm_all(

256 nb_paths=nb_paths,

257 progress_callback=lambda: progress.update(assert_checks, advance=1),

258 exprs=exprs,

259 verbose=verbose,

260 )

261

262 if all(are_ok):

263 logger.info("All notebooks comply with the desired metadata!")

264 else:

265 logger.info(

266 f"Found issues in notebook metadata for {sum(not ok for ok in are_ok)} out"

267 f" of {len(are_ok)} notebooks."

268 )

269 raise Exit(code=1)

270

271

272@app.command(add_help_option=False)

273def fix(

274 paths: List[Path] = Argument(

275 ..., is_eager=True, help="Path(s) of notebook files with conflicts"

276 ),

277 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),

278 metadata_head: bool = Option(

279 True, help="Whether or not to keep the metadata from the head/current notebook"

280 ),

281 cells_head: Optional[bool] = Option(

282 None,

283 help="Whether to keep the cells from the head/base notebook. Omit to keep both",

284 ),

285 cell_fields_ignore: List[str] = Option(

286 [

287 "id",

288 "execution_count",

289 ],

290 help="Cell fields to remove before comparing cells",

291 ),

292 interactive: bool = Option(

293 False,

294 "--interactive",

295 "-i",

296 help="Interactively resolve the conflicts (not implemented)",

297 ),

298 verbose: bool = Option(False, help="Log processed files in console"),

299 config: Optional[Path] = Option(

300 None,

301 "--config",

302 "-c",

303 is_eager=True,

304 callback=_config_callback,

305 resolve_path=True,

306 exists=True,

307 help="Get CLI options from configuration file",

308 ),

309 help: Optional[bool] = Option(

310 None,

311 "--help",

312 is_eager=True,

313 callback=_help_callback,

314 help="Show this message and exit",

315 ),

316) -> None:

317 """

318 Fix git conflicts for notebooks.

319

320 Perform by getting the unmerged blobs from git index, comparing them and returning

321 a valid notebook summarizing the differences - see

322 [git docs](https://git-scm.com/docs/git-ls-files).

323 """

324 filepaths = expand_paths(paths=paths, ignore=ignore)

325 conflict_files = path2conflicts(nb_paths=filepaths)

326 if not conflict_files:

327 raise BadParameter(

328 f"No conflicts found at {', '.join([str(p) for p in filepaths])}."

329 )

330 if interactive:

331 raise NotImplementedError

332

333 with Progress(

334 SpinnerColumn(),

335 TextColumn("[progress.description]{task.description}"),

336 BarColumn(),

337 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),

338 TimeElapsedColumn(),

339 ) as progress:

340 conflicts = progress.add_task(

341 "[yellow]Resolving conflicts", total=len(conflict_files)

342 )

343 conflicts2nbs(

344 conflict_files=conflict_files,

345 meta_first=metadata_head,

346 cells_first=cells_head,

347 cell_fields_ignore=cell_fields_ignore,

348 verbose=verbose,

349 progress_callback=lambda: progress.update(conflicts, advance=1),

350 )

351 logger.info(f"Resolved the conflicts of {len(conflict_files)}!")

352

353

354@app.command()

355def diff() -> None:

356 """Show differences between notebooks (not implemented)."""

357 raise NotImplementedError