Skip to content

Commit 4bf0918

Browse files
authored
Startup: Check File Structure (#6)
### Description <!---Why are we making this change? What does it do?---> Adds in a check to the directory structure when starting up. Validates the (not yet implemented) `metadata.json` file exists, and that the directory structure adheres to the expected structure. ### Testing <!---How was this tested?---> not tested yet, going to take a loop once we are somewhat feature complete ### Impact <!---What components does this impact? How can it affect prod?---> ### Other <!---What else can this impact? What special considerations are needed when reviewing the PR?--->
2 parents 8ade777 + 19b653b commit 4bf0918

File tree

5 files changed

+137
-4
lines changed

5 files changed

+137
-4
lines changed

file_structure.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
```
2+
lens/
3+
├── avax
4+
│   ├── depth
5+
│   │   ├── 2025-02-21.csv
6+
│   │   ├── 2025-02-22.csv
7+
│   │   ├── 2025-02-23.csv
8+
│   │   ├── 2025-02-24.csv
9+
│   │   ├── 2025-02-25.csv
10+
│   │   ├── 2025-02-26.csv
11+
│   │   ├── 2025-02-27.csv
12+
│   │   ├── 2025-02-28.csv
13+
│   │   ├── 2025-03-01.csv
14+
│   │   └── 2025-03-02.csv
15+
│   └── trades
16+
│   ├── 2025-02-21.csv
17+
│   ├── 2025-02-22.csv
18+
│   ├── 2025-02-23.csv
19+
│   ├── 2025-02-24.csv
20+
│   ├── 2025-02-25.csv
21+
│   ├── 2025-02-26.csv
22+
│   ├── 2025-02-27.csv
23+
│   ├── 2025-02-28.csv
24+
│   ├── 2025-03-01.csv
25+
│   └── 2025-03-02.csv
26+
├── btc
27+
│   ├── depth
28+
│   │   ├── 2025-02-21.csv
29+
│   │   ├── 2025-02-22.csv
30+
│   │   ├── 2025-02-23.csv
31+
│   │   ├── 2025-02-24.csv
32+
│   │   ├── 2025-02-25.csv
33+
│   │   ├── 2025-02-26.csv
34+
│   │   ├── 2025-02-27.csv
35+
│   │   ├── 2025-02-28.csv
36+
│   │   ├── 2025-03-01.csv
37+
│   │   └── 2025-03-02.csv
38+
│   └── trades
39+
│   ├── 2025-02-21.csv
40+
│   ├── 2025-02-22.csv
41+
│   ├── 2025-02-23.csv
42+
│   ├── 2025-02-24.csv
43+
│   ├── 2025-02-25.csv
44+
│   ├── 2025-02-26.csv
45+
│   ├── 2025-02-27.csv
46+
│   ├── 2025-02-28.csv
47+
│   ├── 2025-03-01.csv
48+
│   └── 2025-03-02.csv
49+
└── metadata.json
50+
```

lens/core.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
1+
from pathlib import Path
2+
13
from loguru import logger
24

35
from lens.util.hostname import check_valid_lens_hostname, get_hostname
6+
from lens.util.paths import check_file_structure_correct, check_metadata_exists
47

58

69
class Lens:
710
def __init__(self) -> None:
811
pass
912

10-
def startup(self) -> None:
13+
def startup(self, data_root: Path) -> None:
1114
if not check_valid_lens_hostname(get_hostname()):
1215
raise RuntimeError("Cannot start lens on invalid host.")
16+
logger.info("Host check succeeded.")
17+
if not check_metadata_exists(data_root):
18+
raise RuntimeError("Failed to find metadata.json file in the data directory.")
19+
logger.info("Metadata found.")
20+
if not check_file_structure_correct(data_root):
21+
raise RuntimeError("File structure does not match expected directory structure.")
22+
logger.info("File structure is correct.")
1323
logger.info("Lens startup succeeded.")

lens/descriptor.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from dataclasses import dataclass
2+
from pathlib import Path
3+
4+
TRADE_DIRNAME = "trades"
5+
DEPTH_DIRNAME = "depth"
6+
7+
8+
@dataclass
9+
class DataPathContainer:
10+
trade_paths: list[Path]
11+
depth_paths: list[Path]
12+
13+
14+
@dataclass
15+
class DataDescriptor:
16+
root: Path
17+
data_paths: dict[str, DataPathContainer]
18+
19+
20+
def load_data_descriptor(base_path: Path) -> DataDescriptor:
21+
data_paths = {}
22+
coin_paths = [child for child in base_path.iterdir() if child.is_dir()]
23+
24+
for coin_path in coin_paths:
25+
trade_path = coin_path / TRADE_DIRNAME
26+
depth_path = coin_path / DEPTH_DIRNAME
27+
trade_files = [child for child in trade_path.iterdir()]
28+
depth_files = [child for child in depth_path.iterdir()]
29+
data_paths[coin_path.name] = DataPathContainer(trade_files, depth_files)
30+
31+
return DataDescriptor(base_path, data_paths)

lens/main.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,28 @@
1+
import argparse
2+
from pathlib import Path
3+
from typing import Any
4+
15
from loguru import logger
26

37
from lens.core import Lens
48

59

10+
def parse_arguments() -> Any:
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument("-d", "--directory", type=str, required=True)
13+
14+
args = parser.parse_args()
15+
return args
16+
17+
618
@logger.catch
7-
def main() -> None:
19+
def main(data_root: Path) -> None:
820
ln = Lens()
921
logger.info("Starting lens initialization check.")
10-
ln.startup()
22+
ln.startup(data_root)
1123

1224

1325
if __name__ == "__main__":
14-
main()
26+
args = parse_arguments()
27+
data_root = Path(args.directory)
28+
main(data_root)

lens/util/paths.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from pathlib import Path
2+
3+
from loguru import logger
4+
5+
from lens.descriptor import load_data_descriptor
6+
7+
METADATA_FILENAME = "metadata.json"
8+
9+
10+
def check_metadata_exists(base_path: Path) -> bool:
11+
return (base_path / METADATA_FILENAME).exists()
12+
13+
14+
def check_file_structure_correct(base_path: Path) -> bool:
15+
try:
16+
descriptor = load_data_descriptor(base_path)
17+
except FileNotFoundError:
18+
logger.warning("Failed to build data descriptor - missing files.")
19+
return False
20+
data_file_counts = set()
21+
for _, data_container in descriptor.data_paths.items():
22+
data_file_counts.add(len(data_container.trade_paths))
23+
data_file_counts.add(len(data_container.depth_paths))
24+
25+
if len(data_file_counts) != 1:
26+
logger.warning("Data files are not uniform in counts.")
27+
return False
28+
return True

0 commit comments

Comments
 (0)