Coverage for databooks/common.py: 89%
36 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-10-03 12:27 +0000
1"""Common set of miscellaneous functions."""
2from itertools import chain
3from pathlib import Path
4from typing import Iterable, List, Optional, Sequence
6from databooks.logging import get_logger
8logger = get_logger(__file__)
11def expand_paths(
12 paths: List[Path], *, ignore: Sequence[str] = ("!*",), rglob: str = "*.ipynb"
13) -> Optional[List[Path]]:
14 """
15 Get paths of existing file from list of directory or file paths.
17 :param paths: Paths to consider (can be directories or files)
18 :param ignore: Glob expressions of files to ignore
19 :param rglob: Glob expression for expanding directory paths and filtering out
20 existing file paths (i.e.: to retrieve only notebooks)
21 :return: List of existing file paths
22 """
23 if not paths:
24 return None
25 filepaths = set(
26 chain.from_iterable(
27 list(path.resolve().rglob(rglob)) if path.is_dir() else [path]
28 for path in paths
29 )
30 )
31 common_path = find_common_parent(paths=paths)
32 ignored = set(chain.from_iterable(common_path.rglob(i) for i in ignore))
33 ignored = {p.resolve() for p in ignored}
34 logger.debug(
35 f"{len(ignored)} files will be ignored from {len(filepaths)} file paths."
36 )
37 valid_filepaths = [p for p in filepaths - ignored if p.is_file()]
39 if not valid_filepaths:
40 logger.debug(
41 f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`."
42 )
43 return valid_filepaths
46def find_common_parent(paths: Iterable[Path]) -> Path:
47 """Find common parent amongst several file paths (includes current path)."""
48 if not paths:
49 raise ValueError(f"Expected non-empty `paths`, got {paths}.")
50 return max(set.intersection(*[{*p.resolve().parents, p.resolve()} for p in paths]))
53def find_obj(
54 obj_name: str, start: Path, finish: Path, is_dir: bool = False
55) -> Optional[Path]:
56 """
57 Recursively find file along directory path, from the end (child) directory to start.
59 :param obj_name: File name to locate
60 :param start: Start (parent) directory
61 :param finish: Finish (child) path
62 :param is_dir: Whether object is a directory or a file
63 :return: File path
64 """
65 finish = finish if finish.is_dir() else finish.parent
66 logger.debug(f"Searching for {obj_name} between {start} and {finish}.")
67 if not start.is_dir():
68 raise ValueError("Parameter `start` must be a directory.")
70 if start.resolve() not in [finish, *finish.resolve().parents]:
71 logger.debug(
72 f"Parameter `start` is not a parent directory of `finish` (for {start} and"
73 f" {finish}). Cannot find {obj_name}."
74 )
75 return None
77 is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file()
78 if is_obj:
79 return finish / obj_name
80 elif finish.samefile(start):
81 logger.debug(f"{obj_name} not found between {start} and {finish}.")
82 return None
83 else:
84 return find_obj(
85 obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir
86 )