Coverage for databooks/cli.py: 92%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

91 statements  

1"""Main CLI application.""" 

2from itertools import compress 

3from pathlib import Path 

4from typing import List, Optional 

5 

6import tomli 

7from rich.progress import ( 

8 BarColumn, 

9 Progress, 

10 SpinnerColumn, 

11 TextColumn, 

12 TimeElapsedColumn, 

13) 

14from rich.prompt import Confirm 

15from typer import Argument, BadParameter, Context, Exit, Option, Typer, echo 

16 

17from databooks.affirm import affirm_all 

18from databooks.common import expand_paths 

19from databooks.config import TOML_CONFIG_FILE, get_config 

20from databooks.conflicts import conflicts2nbs, path2conflicts 

21from databooks.logging import get_logger 

22from databooks.metadata import clear_all 

23from databooks.recipes import Recipe 

24from databooks.version import __version__ 

25 

26logger = get_logger(__file__) 

27 

28app = Typer() 

29 

30 

31def _version_callback(show_version: bool) -> None: 

32 """Return application version.""" 

33 if show_version: 

34 echo("databooks version: " + __version__) 

35 raise Exit() 

36 

37 

38def _help_callback(ctx: Context, show_help: Optional[bool]) -> None: 

39 """Reimplement `help` command to execute eagerly.""" 

40 if show_help: 

41 echo(ctx.command.get_help(ctx)) 

42 raise Exit() 

43 

44 

45def _config_callback(ctx: Context, config_path: Optional[Path]) -> Optional[Path]: 

46 """Get config file and inject values into context to override default args.""" 

47 target_paths = expand_paths( 

48 paths=[Path(p) for p in ctx.params.get("paths", ())], rglob="*" 

49 ) 

50 config_path = ( 

51 get_config( 

52 target_paths=target_paths, 

53 config_filename=TOML_CONFIG_FILE, 

54 ) 

55 if config_path is None and target_paths 

56 else config_path 

57 ) 

58 logger.debug(f"Loading config file from: {config_path}") 

59 

60 if config_path is not None: # config may not be specified 

61 with config_path.open("rb") as f: 

62 conf = ( 

63 tomli.load(f) 

64 .get("tool", {}) 

65 .get("databooks", {}) 

66 .get(ctx.command.name, {}) 

67 ) 

68 # Merge configuration 

69 ctx.default_map = { 

70 **(ctx.default_map or {}), 

71 **{k.replace("-", "_"): v for k, v in conf.items()}, 

72 } 

73 return config_path 

74 

75 

76def _check_paths(paths: List[Path], ignore: List[str]) -> List[Path]: 

77 if any(path.suffix not in ("", ".ipynb") for path in paths): 

78 raise BadParameter( 

79 "Expected either notebook files, a directory or glob expression." 

80 ) 

81 nb_paths = expand_paths(paths=paths, ignore=ignore) 

82 if not nb_paths: 

83 logger.info(f"No notebooks found in {paths}. Nothing to do.") 

84 raise Exit() 

85 return nb_paths 

86 

87 

88@app.callback() 

89def callback( # noqa: D103 

90 version: Optional[bool] = Option( 

91 None, "--version", callback=_version_callback, is_eager=True 

92 ) 

93) -> None: 

94 """CLI tool to resolve git conflicts and remove metadata in notebooks.""" 

95 

96 

97@app.command(add_help_option=False) 

98def meta( 

99 paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"), 

100 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"), 

101 prefix: str = Option("", help="Prefix to add to filepath when writing files"), 

102 suffix: str = Option("", help="Suffix to add to filepath when writing files"), 

103 rm_outs: bool = Option(False, help="Whether to remove cell outputs"), 

104 rm_exec: bool = Option(True, help="Whether to remove the cell execution counts"), 

105 nb_meta_keep: List[str] = Option((), help="Notebook metadata fields to keep"), 

106 cell_meta_keep: List[str] = Option((), help="Cells metadata fields to keep"), 

107 cell_fields_keep: List[str] = Option( 

108 (), 

109 help="Other (excluding `execution_counts` and `outputs`) cell fields to keep", 

110 ), 

111 overwrite: bool = Option(False, "--yes", "-y", help="Confirm overwrite of files"), 

112 check: bool = Option( 

113 False, 

114 "--check", 

115 help="Don't write files but check whether there is unwanted metadata", 

116 ), 

117 verbose: bool = Option( 

118 False, "--verbose", "-v", help="Log processed files in console" 

119 ), 

120 config: Optional[Path] = Option( 

121 None, 

122 "--config", 

123 "-c", 

124 is_eager=True, 

125 callback=_config_callback, 

126 resolve_path=True, 

127 exists=True, 

128 help="Get CLI options from configuration file", 

129 ), 

130 help: Optional[bool] = Option( 

131 None, 

132 "--help", 

133 is_eager=True, 

134 callback=_help_callback, 

135 help="Show this message and exit", 

136 ), 

137) -> None: 

138 """Clear both notebook and cell metadata.""" 

139 nb_paths = _check_paths(paths=paths, ignore=ignore) 

140 

141 if not bool(prefix + suffix) and not check: 

142 overwrite = ( 

143 Confirm.ask( 

144 f"{len(nb_paths)} files will be overwritten" 

145 " (no prefix nor suffix was passed). Continue?" 

146 ) 

147 if not overwrite 

148 else overwrite 

149 ) 

150 if not overwrite: 

151 raise Exit() 

152 else: 

153 logger.warning(f"{len(nb_paths)} files will be overwritten") 

154 

155 write_paths = [p.parent / (prefix + p.stem + suffix + p.suffix) for p in nb_paths] 

156 cell_fields_keep = list( 

157 compress(["outputs", "execution_count"], (not v for v in (rm_outs, rm_exec))) 

158 ) + list(cell_fields_keep) 

159 with Progress( 

160 SpinnerColumn(), 

161 TextColumn("[progress.description]{task.description}"), 

162 BarColumn(), 

163 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), 

164 TimeElapsedColumn(), 

165 ) as progress: 

166 metadata = progress.add_task("[yellow]Removing metadata", total=len(nb_paths)) 

167 

168 are_equal = clear_all( 

169 read_paths=nb_paths, 

170 write_paths=write_paths, 

171 progress_callback=lambda: progress.update(metadata, advance=1), 

172 notebook_metadata_keep=nb_meta_keep, 

173 cell_metadata_keep=cell_meta_keep, 

174 cell_fields_keep=cell_fields_keep, 

175 check=check, 

176 verbose=verbose, 

177 overwrite=overwrite, 

178 ) 

179 if check: 

180 if all(are_equal): 

181 logger.info("No unwanted metadata!") 

182 else: 

183 logger.info( 

184 f"Found unwanted metadata in {sum(not eq for eq in are_equal)} out of" 

185 f" {len(are_equal)} files." 

186 ) 

187 raise Exit(code=1) 

188 else: 

189 logger.info( 

190 f"The metadata of {sum(not eq for eq in are_equal)} out of {len(are_equal)}" 

191 " notebooks were removed!" 

192 ) 

193 

194 

195@app.command("assert", add_help_option=False) 

196def affirm_meta( 

197 paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"), 

198 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"), 

199 expr: List[str] = Option( 

200 (), "--expr", "-x", help="Expressions to assert on notebooks" 

201 ), 

202 recipe: List[Recipe] = Option( 

203 (), 

204 "--recipe", 

205 "-r", 

206 help="Common recipes of expressions - see" 

207 " https://databooks.dev/0.1.15/usage/overview/#recipes", 

208 ), 

209 verbose: bool = Option( 

210 False, "--verbose", "-v", help="Log processed files in console" 

211 ), 

212 config: Optional[Path] = Option( 

213 None, 

214 "--config", 

215 "-c", 

216 is_eager=True, 

217 callback=_config_callback, 

218 resolve_path=True, 

219 exists=True, 

220 help="Get CLI options from configuration file", 

221 ), 

222 help: Optional[bool] = Option( 

223 None, 

224 "--help", 

225 is_eager=True, 

226 callback=_help_callback, 

227 help="Show this message and exit", 

228 ), 

229) -> None: 

230 """ 

231 Assert notebook metadata has desired values. 

232 

233 Pass one (or multiple) strings or recipes. The available variables in scope include 

234 `nb` (notebook), `raw_cells` (notebook cells of `raw` type), `md_cells` (notebook 

235 cells of `markdown` type), `code_cells` (notebook cells of `code` type) and 

236 `exec_cells` (notebook cells of `code` type that were executed - have an `execution 

237 count` value). Recipes can be found on `databooks.recipes.CookBook`. 

238 """ 

239 nb_paths = _check_paths(paths=paths, ignore=ignore) 

240 exprs = [r.name for r in recipe] + list(expr) 

241 if not exprs: 

242 raise BadParameter("Must specify at least one of `expr` or `recipe`.") 

243 

244 with Progress( 

245 SpinnerColumn(), 

246 TextColumn("[progress.description]{task.description}"), 

247 BarColumn(), 

248 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), 

249 TimeElapsedColumn(), 

250 ) as progress: 

251 assert_checks = progress.add_task( 

252 "[yellow]Running assert checks", total=len(nb_paths) 

253 ) 

254 

255 are_ok = affirm_all( 

256 nb_paths=nb_paths, 

257 progress_callback=lambda: progress.update(assert_checks, advance=1), 

258 exprs=exprs, 

259 verbose=verbose, 

260 ) 

261 

262 if all(are_ok): 

263 logger.info("All notebooks comply with the desired metadata!") 

264 else: 

265 logger.info( 

266 f"Found issues in notebook metadata for {sum(not ok for ok in are_ok)} out" 

267 f" of {len(are_ok)} notebooks." 

268 ) 

269 raise Exit(code=1) 

270 

271 

272@app.command(add_help_option=False) 

273def fix( 

274 paths: List[Path] = Argument( 

275 ..., is_eager=True, help="Path(s) of notebook files with conflicts" 

276 ), 

277 ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"), 

278 metadata_head: bool = Option( 

279 True, help="Whether or not to keep the metadata from the head/current notebook" 

280 ), 

281 cells_head: Optional[bool] = Option( 

282 None, 

283 help="Whether to keep the cells from the head/base notebook. Omit to keep both", 

284 ), 

285 cell_fields_ignore: List[str] = Option( 

286 [ 

287 "id", 

288 "execution_count", 

289 ], 

290 help="Cell fields to remove before comparing cells", 

291 ), 

292 interactive: bool = Option( 

293 False, 

294 "--interactive", 

295 "-i", 

296 help="Interactively resolve the conflicts (not implemented)", 

297 ), 

298 verbose: bool = Option(False, help="Log processed files in console"), 

299 config: Optional[Path] = Option( 

300 None, 

301 "--config", 

302 "-c", 

303 is_eager=True, 

304 callback=_config_callback, 

305 resolve_path=True, 

306 exists=True, 

307 help="Get CLI options from configuration file", 

308 ), 

309 help: Optional[bool] = Option( 

310 None, 

311 "--help", 

312 is_eager=True, 

313 callback=_help_callback, 

314 help="Show this message and exit", 

315 ), 

316) -> None: 

317 """ 

318 Fix git conflicts for notebooks. 

319 

320 Perform by getting the unmerged blobs from git index, comparing them and returning 

321 a valid notebook summarizing the differences - see 

322 [git docs](https://git-scm.com/docs/git-ls-files). 

323 """ 

324 filepaths = expand_paths(paths=paths, ignore=ignore) 

325 conflict_files = path2conflicts(nb_paths=filepaths) 

326 if not conflict_files: 

327 raise BadParameter( 

328 f"No conflicts found at {', '.join([str(p) for p in filepaths])}." 

329 ) 

330 if interactive: 

331 raise NotImplementedError 

332 

333 with Progress( 

334 SpinnerColumn(), 

335 TextColumn("[progress.description]{task.description}"), 

336 BarColumn(), 

337 TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), 

338 TimeElapsedColumn(), 

339 ) as progress: 

340 conflicts = progress.add_task( 

341 "[yellow]Resolving conflicts", total=len(conflict_files) 

342 ) 

343 conflicts2nbs( 

344 conflict_files=conflict_files, 

345 meta_first=metadata_head, 

346 cells_first=cells_head, 

347 cell_fields_ignore=cell_fields_ignore, 

348 verbose=verbose, 

349 progress_callback=lambda: progress.update(conflicts, advance=1), 

350 ) 

351 logger.info(f"Resolved the conflicts of {len(conflict_files)}!") 

352 

353 

354@app.command() 

355def diff() -> None: 

356 """Show differences between notebooks (not implemented).""" 

357 raise NotImplementedError