Common utils
Common set of miscellaneous functions.
expand_paths(paths, *, ignore=['!*'], rglob='*.ipynb')
Get paths of existing file from list of directory or file paths.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
paths |
List[pathlib.Path] |
Paths to consider (can be directories or files) |
required |
ignore |
List[str] |
Glob expressions of files to ignore |
['!*'] |
rglob |
str |
Glob expression for expanding directory paths and filtering out existing file paths (i.e.: to retrieve only notebooks) |
'*.ipynb' |
Returns:
Type | Description |
---|---|
List[pathlib.Path] |
List of existing file paths |
Source code in databooks/common.py
def expand_paths(
paths: List[Path], *, ignore: List[str] = ["!*"], rglob: str = "*.ipynb"
) -> List[Path]:
"""
Get paths of existing file from list of directory or file paths.
:param paths: Paths to consider (can be directories or files)
:param ignore: Glob expressions of files to ignore
:param rglob: Glob expression for expanding directory paths and filtering out
existing file paths (i.e.: to retrieve only notebooks)
:return: List of existing file paths
"""
filepaths = list(
chain.from_iterable(
list(path.rglob(rglob)) if path.is_dir() else [path] for path in paths
)
)
valid_filepaths = [
p
for p in filepaths
if not any(p.match(i) for i in ignore) and p.is_file() and p.match(rglob)
]
if not valid_filepaths:
logger.debug(
f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`."
)
return valid_filepaths
find_common_parent(paths)
Find common parent amongst several file paths.
Source code in databooks/common.py
def find_common_parent(paths: Iterable[Path]) -> Path:
"""Find common parent amongst several file paths."""
if not paths:
raise ValueError(f"Expected non-empty `paths`, got {paths}.")
return max(set.intersection(*[set(p.resolve().parents) for p in paths]))
find_obj(obj_name, start, finish, is_dir=False)
Recursively find file along directory path, from the end (child) directory to start.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obj_name |
str |
File name to locate |
required |
start |
Path |
Start (parent) directory |
required |
finish |
Path |
Finish (child) directory |
required |
is_dir |
bool |
Whether object is a directory or a file |
False |
Returns:
Type | Description |
---|---|
Optional[pathlib.Path] |
File path |
Source code in databooks/common.py
def find_obj(
obj_name: str, start: Path, finish: Path, is_dir: bool = False
) -> Optional[Path]:
"""
Recursively find file along directory path, from the end (child) directory to start.
:param obj_name: File name to locate
:param start: Start (parent) directory
:param finish: Finish (child) directory
:param is_dir: Whether object is a directory or a file
:return: File path
"""
if not start.is_dir() or not finish.is_dir():
raise ValueError("Parameters `start` and `finish` must be directories.")
if start.resolve() not in [finish, *finish.resolve().parents]:
logger.debug(
f"Parameter `start` is not a parent directory of `finish` (for {start} and"
f" {finish}). Cannot find {obj_name}."
)
return None
is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file()
if is_obj:
return finish / obj_name
elif finish.samefile(start):
logger.debug(f"{obj_name} not found between {start} and {finish}.")
return None
else:
return find_obj(
obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir
)
write_notebook(nb, path)
Write notebook to a path.
Source code in databooks/common.py
def write_notebook(nb: JupyterNotebook, path: Path) -> None:
"""Write notebook to a path."""
with path.open("w") as f:
json.dump(nb.dict(), fp=f, indent=2)