Coverage for databooks/common.py: 89%

36 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-10-03 12:27 +0000

1"""Common set of miscellaneous functions.""" 

2from itertools import chain 

3from pathlib import Path 

4from typing import Iterable, List, Optional, Sequence 

5 

6from databooks.logging import get_logger 

7 

8logger = get_logger(__file__) 

9 

10 

11def expand_paths( 

12 paths: List[Path], *, ignore: Sequence[str] = ("!*",), rglob: str = "*.ipynb" 

13) -> Optional[List[Path]]: 

14 """ 

15 Get paths of existing file from list of directory or file paths. 

16 

17 :param paths: Paths to consider (can be directories or files) 

18 :param ignore: Glob expressions of files to ignore 

19 :param rglob: Glob expression for expanding directory paths and filtering out 

20 existing file paths (i.e.: to retrieve only notebooks) 

21 :return: List of existing file paths 

22 """ 

23 if not paths: 

24 return None 

25 filepaths = set( 

26 chain.from_iterable( 

27 list(path.resolve().rglob(rglob)) if path.is_dir() else [path] 

28 for path in paths 

29 ) 

30 ) 

31 common_path = find_common_parent(paths=paths) 

32 ignored = set(chain.from_iterable(common_path.rglob(i) for i in ignore)) 

33 ignored = {p.resolve() for p in ignored} 

34 logger.debug( 

35 f"{len(ignored)} files will be ignored from {len(filepaths)} file paths." 

36 ) 

37 valid_filepaths = [p for p in filepaths - ignored if p.is_file()] 

38 

39 if not valid_filepaths: 

40 logger.debug( 

41 f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`." 

42 ) 

43 return valid_filepaths 

44 

45 

46def find_common_parent(paths: Iterable[Path]) -> Path: 

47 """Find common parent amongst several file paths (includes current path).""" 

48 if not paths: 

49 raise ValueError(f"Expected non-empty `paths`, got {paths}.") 

50 return max(set.intersection(*[{*p.resolve().parents, p.resolve()} for p in paths])) 

51 

52 

53def find_obj( 

54 obj_name: str, start: Path, finish: Path, is_dir: bool = False 

55) -> Optional[Path]: 

56 """ 

57 Recursively find file along directory path, from the end (child) directory to start. 

58 

59 :param obj_name: File name to locate 

60 :param start: Start (parent) directory 

61 :param finish: Finish (child) path 

62 :param is_dir: Whether object is a directory or a file 

63 :return: File path 

64 """ 

65 finish = finish if finish.is_dir() else finish.parent 

66 logger.debug(f"Searching for {obj_name} between {start} and {finish}.") 

67 if not start.is_dir(): 

68 raise ValueError("Parameter `start` must be a directory.") 

69 

70 if start.resolve() not in [finish, *finish.resolve().parents]: 

71 logger.debug( 

72 f"Parameter `start` is not a parent directory of `finish` (for {start} and" 

73 f" {finish}). Cannot find {obj_name}." 

74 ) 

75 return None 

76 

77 is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file() 

78 if is_obj: 

79 return finish / obj_name 

80 elif finish.samefile(start): 

81 logger.debug(f"{obj_name} not found between {start} and {finish}.") 

82 return None 

83 else: 

84 return find_obj( 

85 obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir 

86 )