Skip to content

Common utils

Common set of miscellaneous functions.

expand_paths(paths, *, ignore=('!*',), rglob='*.ipynb')

Get paths of existing file from list of directory or file paths.

Parameters:

Name Type Description Default
paths List[pathlib.Path]

Paths to consider (can be directories or files)

required
ignore Sequence[str]

Glob expressions of files to ignore

('!*',)
rglob str

Glob expression for expanding directory paths and filtering out existing file paths (i.e.: to retrieve only notebooks)

'*.ipynb'

Returns:

Type Description
Optional[List[pathlib.Path]]

List of existing file paths

Source code in databooks/common.py
def expand_paths(
    paths: List[Path], *, ignore: Sequence[str] = ("!*",), rglob: str = "*.ipynb"
) -> Optional[List[Path]]:
    """
    Get paths of existing file from list of directory or file paths.

    :param paths: Paths to consider (can be directories or files)
    :param ignore: Glob expressions of files to ignore
    :param rglob: Glob expression for expanding directory paths and filtering out
     existing file paths (i.e.: to retrieve only notebooks)
    :return: List of existing file paths
    """
    if not paths:
        return None
    filepaths = set(
        chain.from_iterable(
            list(path.resolve().rglob(rglob)) if path.is_dir() else [path]
            for path in paths
        )
    )
    common_path = find_common_parent(paths=paths)
    ignored = set(chain.from_iterable(common_path.rglob(i) for i in ignore))
    ignored = {p.resolve() for p in ignored}
    logger.debug(
        f"{len(ignored)} files will be ignored from {len(filepaths)} file paths."
    )
    valid_filepaths = [p for p in filepaths - ignored if p.is_file()]

    if not valid_filepaths:
        logger.debug(
            f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`."
        )
    return valid_filepaths

find_common_parent(paths)

Find common parent amongst several file paths (includes current path).

Source code in databooks/common.py
def find_common_parent(paths: Iterable[Path]) -> Path:
    """Find common parent amongst several file paths (includes current path)."""
    if not paths:
        raise ValueError(f"Expected non-empty `paths`, got {paths}.")
    return max(set.intersection(*[{*p.resolve().parents, p.resolve()} for p in paths]))

find_obj(obj_name, start, finish, is_dir=False)

Recursively find file along directory path, from the end (child) directory to start.

Parameters:

Name Type Description Default
obj_name str

File name to locate

required
start Path

Start (parent) directory

required
finish Path

Finish (child) path

required
is_dir bool

Whether object is a directory or a file

False

Returns:

Type Description
Optional[pathlib.Path]

File path

Source code in databooks/common.py
def find_obj(
    obj_name: str, start: Path, finish: Path, is_dir: bool = False
) -> Optional[Path]:
    """
    Recursively find file along directory path, from the end (child) directory to start.

    :param obj_name: File name to locate
    :param start: Start (parent) directory
    :param finish: Finish (child) path
    :param is_dir: Whether object is a directory or a file
    :return: File path
    """
    finish = finish if finish.is_dir() else finish.parent
    logger.debug(f"Searching for {obj_name} between {start} and {finish}.")
    if not start.is_dir():
        raise ValueError("Parameter `start` must be a directory.")

    if start.resolve() not in [finish, *finish.resolve().parents]:
        logger.debug(
            f"Parameter `start` is not a parent directory of `finish` (for {start} and"
            f" {finish}). Cannot find {obj_name}."
        )
        return None

    is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file()
    if is_obj:
        return finish / obj_name
    elif finish.samefile(start):
        logger.debug(f"{obj_name} not found between {start} and {finish}.")
        return None
    else:
        return find_obj(
            obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir
        )