Skip to content

Commit 9f1b6db

Browse files
authored
FileIO Refactor -- Make More Dynamic/Robust (#46)
Now, there are three structures here: FileRWInterface and abstract class declaring load, write and append. In some cases there is a base implementation provided. JSONFile, CSVFile & NDJSONFile which implement the FileRWInterface FileIO which is the "user interface". This class allows the user to to specify and encoding and path to do all the file operations by any of the three file implementations. It provides convenience methods write_XXX, load_XXX and append_XXX for each of the existing filetypes.
1 parent 2606417 commit 9f1b6db

File tree

5 files changed

+406
-270
lines changed

5 files changed

+406
-270
lines changed

dune_client/file.py

Lines changed: 0 additions & 227 deletions
This file was deleted.

dune_client/file/__init__.py

Whitespace-only changes.

dune_client/file/base.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""File Reader and Writer for DuneRecords"""
2+
from __future__ import annotations
3+
4+
import csv
5+
import json
6+
import logging
7+
import os.path
8+
from abc import ABC, abstractmethod
9+
from pathlib import Path
10+
from typing import TextIO, List, Tuple
11+
12+
# ndjson missing types: https://github.com/rhgrant10/ndjson/issues/10
13+
import ndjson # type: ignore
14+
15+
from dune_client.types import DuneRecord
16+
17+
logger = logging.getLogger(__name__)
18+
logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s %(message)s")
19+
20+
21+
class FileRWInterface(ABC):
22+
"""Interface for File Read, Write and Append functionality (specific to Dune Query Results)"""
23+
24+
def __init__(self, path: Path | str, name: str, encoding: str = "utf-8"):
25+
self.path = path
26+
self.filename = name
27+
self.encoding = encoding
28+
29+
@property
30+
def filepath(self) -> str:
31+
"""Internal method for building absolute path."""
32+
return os.path.join(self.path, self.filename)
33+
34+
@abstractmethod
35+
def _assert_matching_keys(self, keys: Tuple[str, ...]) -> None:
36+
"""Used as validation for append"""
37+
38+
@abstractmethod
39+
def load(self, file: TextIO) -> list[DuneRecord]:
40+
"""Loads DuneRecords from `file`"""
41+
42+
@abstractmethod
43+
def write(
44+
self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False
45+
) -> None:
46+
"""Writes `data` to `out_file`"""
47+
48+
def append(self, data: List[DuneRecord]) -> None:
49+
"""Appends `data` to file with `name`"""
50+
if len(data) > 0:
51+
self._assert_matching_keys(tuple(data[0].keys()))
52+
with open(self.filepath, "a+", encoding=self.encoding) as out_file:
53+
return self.write(out_file, data, skip_headers=True)
54+
55+
56+
class CSVFile(FileRWInterface):
57+
"""File Read/Writer for CSV format"""
58+
59+
def _assert_matching_keys(self, keys: Tuple[str, ...]) -> None:
60+
with open(self.filepath, "r", encoding=self.encoding) as file:
61+
# Check matching headers.
62+
headers = file.readline()
63+
existing_keys = headers.strip().split(",")
64+
65+
key_tuple = tuple(existing_keys)
66+
assert keys == key_tuple, f"{keys} != {key_tuple}"
67+
68+
def load(self, file: TextIO) -> list[DuneRecord]:
69+
"""Loads DuneRecords from `file`"""
70+
return list(csv.DictReader(file))
71+
72+
def write(
73+
self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False
74+
) -> None:
75+
"""Writes `data` to `out_file`"""
76+
if len(data) == 0:
77+
logger.warning(
78+
"Writing an empty CSV file with headers -- will not work with append later."
79+
)
80+
return
81+
headers = data[0].keys()
82+
data_tuple = [tuple(rec.values()) for rec in data]
83+
dict_writer = csv.DictWriter(out_file, headers, lineterminator="\n")
84+
if not skip_headers:
85+
dict_writer.writeheader()
86+
writer = csv.writer(out_file, lineterminator="\n")
87+
writer.writerows(data_tuple)
88+
89+
90+
class JSONFile(FileRWInterface):
91+
"""File Read/Writer for JSON format"""
92+
93+
def _assert_matching_keys(self, keys: Tuple[str, ...]) -> None:
94+
with open(self.filepath, "r", encoding=self.encoding) as file:
95+
single_object = json.loads(file.readline())[0]
96+
existing_keys = single_object.keys()
97+
98+
key_tuple = tuple(existing_keys)
99+
assert keys == key_tuple, f"{keys} != {key_tuple}"
100+
101+
def load(self, file: TextIO) -> list[DuneRecord]:
102+
"""Loads DuneRecords from `file`"""
103+
loaded_file: list[DuneRecord] = json.loads(file.read())
104+
return loaded_file
105+
106+
def write(
107+
self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False
108+
) -> None:
109+
"""Writes `data` to `out_file`"""
110+
out_file.write(json.dumps(data))
111+
112+
def append(self, data: List[DuneRecord]) -> None:
113+
"""Appends `data` to file with `name`"""
114+
if len(data) > 0:
115+
self._assert_matching_keys(tuple(data[0].keys()))
116+
with open(self.filepath, "r", encoding=self.encoding) as existing_file:
117+
existing_data = self.load(existing_file)
118+
with open(self.filepath, "w", encoding=self.encoding) as existing_file:
119+
self.write(existing_file, existing_data + data)
120+
121+
122+
class NDJSONFile(FileRWInterface):
123+
"""File Read/Writer for NDJSON format"""
124+
125+
def _assert_matching_keys(self, keys: Tuple[str, ...]) -> None:
126+
127+
with open(self.filepath, "r", encoding=self.encoding) as file:
128+
single_object = json.loads(file.readline())
129+
existing_keys = single_object.keys()
130+
131+
key_tuple = tuple(existing_keys)
132+
assert keys == key_tuple, f"{keys} != {key_tuple}"
133+
134+
def load(self, file: TextIO) -> list[DuneRecord]:
135+
"""Loads DuneRecords from `file`"""
136+
return list(ndjson.reader(file))
137+
138+
def write(
139+
self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False
140+
) -> None:
141+
"""Writes `data` to `out_file`"""
142+
writer = ndjson.writer(out_file, ensure_ascii=False)
143+
for row in data:
144+
writer.writerow(row)

0 commit comments

Comments
 (0)