Coverage for databooks/common.py: 94%
33 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-04 16:41 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-04 16:41 +0000
1"""Common set of miscellaneous functions."""
2from itertools import chain
3from pathlib import Path
4from typing import Iterable, List, Optional, Sequence
6from databooks.logging import get_logger
8logger = get_logger(__file__)
11def expand_paths(
12 paths: List[Path], *, ignore: Sequence[str] = ("!*",), rglob: str = "*.ipynb"
13) -> List[Path]:
14 """
15 Get paths of existing file from list of directory or file paths.
17 :param paths: Paths to consider (can be directories or files)
18 :param ignore: Glob expressions of files to ignore
19 :param rglob: Glob expression for expanding directory paths and filtering out
20 existing file paths (i.e.: to retrieve only notebooks)
21 :return: List of existing file paths
22 """
23 filepaths = set(
24 chain.from_iterable(
25 list(path.resolve().rglob(rglob)) if path.is_dir() else [path]
26 for path in paths
27 )
28 )
29 ignored = set(chain.from_iterable(Path.cwd().rglob(i) for i in ignore))
30 ignored = {p.resolve() for p in ignored}
31 logger.debug(
32 f"{len(ignored)} files will be ignored from {len(filepaths)} file paths."
33 )
34 valid_filepaths = [p for p in filepaths - ignored if p.is_file()]
36 if not valid_filepaths:
37 logger.debug(
38 f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`."
39 )
40 return valid_filepaths
43def find_common_parent(paths: Iterable[Path]) -> Path:
44 """Find common parent amongst several file paths (includes current path)."""
45 if not paths:
46 raise ValueError(f"Expected non-empty `paths`, got {paths}.")
47 return max(set.intersection(*[{*p.resolve().parents, p.resolve()} for p in paths]))
50def find_obj(
51 obj_name: str, start: Path, finish: Path, is_dir: bool = False
52) -> Optional[Path]:
53 """
54 Recursively find file along directory path, from the end (child) directory to start.
56 :param obj_name: File name to locate
57 :param start: Start (parent) directory
58 :param finish: Finish (child) path
59 :param is_dir: Whether object is a directory or a file
60 :return: File path
61 """
62 finish = finish if finish.is_dir() else finish.parent
63 logger.debug(f"Searching for {obj_name} between {start} and {finish}.")
64 if not start.is_dir():
65 raise ValueError("Parameter `start` must be a directory.")
67 if start.resolve() not in [finish, *finish.resolve().parents]:
68 logger.debug(
69 f"Parameter `start` is not a parent directory of `finish` (for {start} and"
70 f" {finish}). Cannot find {obj_name}."
71 )
72 return None
74 is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file()
75 if is_obj:
76 return finish / obj_name
77 elif finish.samefile(start):
78 logger.debug(f"{obj_name} not found between {start} and {finish}.")
79 return None
80 else:
81 return find_obj(
82 obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir
83 )