Coverage for databooks/common.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

33 statements  

1"""Common set of miscellaneous functions.""" 

2import json 

3from itertools import chain 

4from pathlib import Path 

5from typing import Iterable, List, Optional 

6 

7from databooks import JupyterNotebook 

8from databooks.logging import get_logger 

9 

10logger = get_logger(__file__) 

11 

12 

13def write_notebook(nb: JupyterNotebook, path: Path) -> None: 

14 """Write notebook to a path.""" 

15 with path.open("w") as f: 

16 json.dump(nb.dict(), fp=f, indent=2) 

17 

18 

19def expand_paths( 

20 paths: List[Path], *, ignore: List[str] = ["!*"], rglob: str = "*.ipynb" 

21) -> List[Path]: 

22 """ 

23 Get paths of existing file from list of directory or file paths. 

24 

25 :param paths: Paths to consider (can be directories or files) 

26 :param ignore: Glob expressions of files to ignore 

27 :param rglob: Glob expression for expanding directory paths and filtering out 

28 existing file paths (i.e.: to retrieve only notebooks) 

29 :return: List of existing file paths 

30 """ 

31 filepaths = list( 

32 chain.from_iterable( 

33 list(path.rglob(rglob)) if path.is_dir() else [path] for path in paths 

34 ) 

35 ) 

36 valid_filepaths = [ 

37 p 

38 for p in filepaths 

39 if not any(p.match(i) for i in ignore) and p.is_file() and p.match(rglob) 

40 ] 

41 

42 if not valid_filepaths: 

43 logger.debug( 

44 f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`." 

45 ) 

46 return valid_filepaths 

47 

48 

49def find_common_parent(paths: Iterable[Path]) -> Path: 

50 """Find common parent amongst several file paths.""" 

51 if not paths: 

52 raise ValueError(f"Expected non-empty `paths`, got {paths}.") 

53 return max(set.intersection(*[set(p.resolve().parents) for p in paths])) 

54 

55 

56def find_obj( 

57 obj_name: str, start: Path, finish: Path, is_dir: bool = False 

58) -> Optional[Path]: 

59 """ 

60 Recursively find file along directory path, from the end (child) directory to start. 

61 

62 :param obj_name: File name to locate 

63 :param start: Start (parent) directory 

64 :param finish: Finish (child) directory 

65 :param is_dir: Whether object is a directory or a file 

66 :return: File path 

67 """ 

68 if not start.is_dir() or not finish.is_dir(): 

69 raise ValueError("Parameters `start` and `finish` must be directories.") 

70 

71 if start.resolve() not in [finish, *finish.resolve().parents]: 

72 logger.debug( 

73 f"Parameter `start` is not a parent directory of `finish` (for {start} and" 

74 f" {finish}). Cannot find {obj_name}." 

75 ) 

76 return None 

77 

78 is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file() 

79 if is_obj: 

80 return finish / obj_name 

81 elif finish.samefile(start): 

82 logger.debug(f"{obj_name} not found between {start} and {finish}.") 

83 return None 

84 else: 

85 return find_obj( 

86 obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir 

87 )