Coverage for databooks/common.py: 94%

33 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-04 16:41 +0000

1"""Common set of miscellaneous functions.""" 

2from itertools import chain 

3from pathlib import Path 

4from typing import Iterable, List, Optional, Sequence 

5 

6from databooks.logging import get_logger 

7 

8logger = get_logger(__file__) 

9 

10 

11def expand_paths( 

12 paths: List[Path], *, ignore: Sequence[str] = ("!*",), rglob: str = "*.ipynb" 

13) -> List[Path]: 

14 """ 

15 Get paths of existing file from list of directory or file paths. 

16 

17 :param paths: Paths to consider (can be directories or files) 

18 :param ignore: Glob expressions of files to ignore 

19 :param rglob: Glob expression for expanding directory paths and filtering out 

20 existing file paths (i.e.: to retrieve only notebooks) 

21 :return: List of existing file paths 

22 """ 

23 filepaths = set( 

24 chain.from_iterable( 

25 list(path.resolve().rglob(rglob)) if path.is_dir() else [path] 

26 for path in paths 

27 ) 

28 ) 

29 ignored = set(chain.from_iterable(Path.cwd().rglob(i) for i in ignore)) 

30 ignored = {p.resolve() for p in ignored} 

31 logger.debug( 

32 f"{len(ignored)} files will be ignored from {len(filepaths)} file paths." 

33 ) 

34 valid_filepaths = [p for p in filepaths - ignored if p.is_file()] 

35 

36 if not valid_filepaths: 

37 logger.debug( 

38 f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`." 

39 ) 

40 return valid_filepaths 

41 

42 

43def find_common_parent(paths: Iterable[Path]) -> Path: 

44 """Find common parent amongst several file paths (includes current path).""" 

45 if not paths: 

46 raise ValueError(f"Expected non-empty `paths`, got {paths}.") 

47 return max(set.intersection(*[{*p.resolve().parents, p.resolve()} for p in paths])) 

48 

49 

50def find_obj( 

51 obj_name: str, start: Path, finish: Path, is_dir: bool = False 

52) -> Optional[Path]: 

53 """ 

54 Recursively find file along directory path, from the end (child) directory to start. 

55 

56 :param obj_name: File name to locate 

57 :param start: Start (parent) directory 

58 :param finish: Finish (child) path 

59 :param is_dir: Whether object is a directory or a file 

60 :return: File path 

61 """ 

62 finish = finish if finish.is_dir() else finish.parent 

63 logger.debug(f"Searching for {obj_name} between {start} and {finish}.") 

64 if not start.is_dir(): 

65 raise ValueError("Parameter `start` must be a directory.") 

66 

67 if start.resolve() not in [finish, *finish.resolve().parents]: 

68 logger.debug( 

69 f"Parameter `start` is not a parent directory of `finish` (for {start} and" 

70 f" {finish}). Cannot find {obj_name}." 

71 ) 

72 return None 

73 

74 is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file() 

75 if is_obj: 

76 return finish / obj_name 

77 elif finish.samefile(start): 

78 logger.debug(f"{obj_name} not found between {start} and {finish}.") 

79 return None 

80 else: 

81 return find_obj( 

82 obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir 

83 )