Coverage for databooks/common.py: 93%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

28 statements  

1"""Common set of miscellaneous functions.""" 

2from itertools import chain 

3from pathlib import Path 

4from typing import Iterable, List, Optional 

5 

6from databooks.logging import get_logger 

7 

8logger = get_logger(__file__) 

9 

10 

11def expand_paths( 

12 paths: List[Path], *, ignore: List[str] = ["!*"], rglob: str = "*.ipynb" 

13) -> List[Path]: 

14 """ 

15 Get paths of existing file from list of directory or file paths. 

16 

17 :param paths: Paths to consider (can be directories or files) 

18 :param ignore: Glob expressions of files to ignore 

19 :param rglob: Glob expression for expanding directory paths and filtering out 

20 existing file paths (i.e.: to retrieve only notebooks) 

21 :return: List of existing file paths 

22 """ 

23 filepaths = list( 

24 chain.from_iterable( 

25 list(path.rglob(rglob)) if path.is_dir() else [path] for path in paths 

26 ) 

27 ) 

28 valid_filepaths = [ 

29 p 

30 for p in filepaths 

31 if not any(p.match(i) for i in ignore) and p.is_file() and p.match(rglob) 

32 ] 

33 

34 if not valid_filepaths: 

35 logger.debug( 

36 f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`." 

37 ) 

38 return valid_filepaths 

39 

40 

41def find_common_parent(paths: Iterable[Path]) -> Path: 

42 """Find common parent amongst several file paths.""" 

43 if not paths: 

44 raise ValueError(f"Expected non-empty `paths`, got {paths}.") 

45 return max(set.intersection(*[set(p.resolve().parents) for p in paths])) 

46 

47 

48def find_obj( 

49 obj_name: str, start: Path, finish: Path, is_dir: bool = False 

50) -> Optional[Path]: 

51 """ 

52 Recursively find file along directory path, from the end (child) directory to start. 

53 

54 :param obj_name: File name to locate 

55 :param start: Start (parent) directory 

56 :param finish: Finish (child) directory 

57 :param is_dir: Whether object is a directory or a file 

58 :return: File path 

59 """ 

60 if not start.is_dir() or not finish.is_dir(): 

61 raise ValueError("Parameters `start` and `finish` must be directories.") 

62 

63 if start.resolve() not in [finish, *finish.resolve().parents]: 

64 logger.debug( 

65 f"Parameter `start` is not a parent directory of `finish` (for {start} and" 

66 f" {finish}). Cannot find {obj_name}." 

67 ) 

68 return None 

69 

70 is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file() 

71 if is_obj: 

72 return finish / obj_name 

73 elif finish.samefile(start): 

74 logger.debug(f"{obj_name} not found between {start} and {finish}.") 

75 return None 

76 else: 

77 return find_obj( 

78 obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir 

79 )