Common utils

Common set of miscellaneous functions.

expand_paths(paths, *, ignore=('!*',), rglob='*.ipynb')

Get paths of existing file from list of directory or file paths.


Name Type Description Default
paths List[pathlib.Path]

Paths to consider (can be directories or files)

ignore Sequence[str]

Glob expressions of files to ignore

rglob str

Glob expression for expanding directory paths and filtering out existing file paths (i.e.: to retrieve only notebooks)



Type Description

List of existing file paths

Source code in databooks/
def expand_paths(
    paths: List[Path], *, ignore: Sequence[str] = ("!*",), rglob: str = "*.ipynb"
) -> List[Path]:
    filepaths = set(
            list(path.resolve().rglob(rglob)) if path.is_dir() else [path]
            for path in paths
    ignored = set(chain.from_iterable(Path.cwd().rglob(i) for i in ignore))
    ignored = {p.resolve() for p in ignored}
        f"{len(ignored)} files will be ignored from {len(filepaths)} file paths."
    valid_filepaths = [p for p in filepaths - ignored if p.is_file()]

    if not valid_filepaths:
            f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`."
    return valid_filepaths


Find common parent amongst several file paths (includes current path).

Source code in databooks/
def find_common_parent(paths: Iterable[Path]) -> Path:
    """Find common parent amongst several file paths (includes current path)."""
    if not paths:
        raise ValueError(f"Expected non-empty `paths`, got {paths}.")
    return max(set.intersection(*[{*p.resolve().parents, p.resolve()} for p in paths]))

find_obj(obj_name, start, finish, is_dir=False)

Recursively find file along directory path, from the end (child) directory to start.


Name Type Description Default
obj_name str

File name to locate

start Path

Start (parent) directory

finish Path

Finish (child) path

is_dir bool

Whether object is a directory or a file



Type Description

File path

Source code in databooks/
def find_obj(
    obj_name: str, start: Path, finish: Path, is_dir: bool = False
) -> Optional[Path]:
    finish = finish if finish.is_dir() else finish.parent
    logger.debug(f"Searching for {obj_name} between {start} and {finish}.")
    if not start.is_dir():
        raise ValueError("Parameter `start` must be a directory.")

    if start.resolve() not in [finish, *finish.resolve().parents]:
            f"Parameter `start` is not a parent directory of `finish` (for {start} and"
            f" {finish}). Cannot find {obj_name}."
        return None

    is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file()
    if is_obj:
        return finish / obj_name
    elif finish.samefile(start):
        logger.debug(f"{obj_name} not found between {start} and {finish}.")
        return None
        return find_obj(
            obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir
