Skip to content

Commit 90b832b

Browse files
committed
Store Module Enhancements: Update CSV, Pandas, PyArrow, and native Python store implementations with tests
1 parent 7df90dc commit 90b832b

File tree

13 files changed

+1089
-50
lines changed

13 files changed

+1089
-50
lines changed

cosmotech/coal/store/__init__.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
8+
"""
9+
Store module.
10+
11+
This module provides functions for working with the Store,
12+
including loading and converting data.
13+
"""
14+
15+
# Re-export the Store class
16+
from cosmotech.coal.store.store import Store
17+
18+
# Re-export functions from the csv module
19+
from cosmotech.coal.store.csv import (
20+
store_csv_file,
21+
convert_store_table_to_csv,
22+
)
23+
24+
# Re-export functions from the native_python module
25+
from cosmotech.coal.store.native_python import (
26+
store_pylist,
27+
convert_table_as_pylist,
28+
)
29+
30+
# Re-export functions from the pandas module (if available)
31+
32+
from cosmotech.coal.store.pandas import (
33+
store_dataframe,
34+
convert_store_table_to_dataframe as convert_store_table_to_pandas_dataframe,
35+
)
36+
37+
# Re-export functions from the pyarrow module (if available)
38+
39+
from cosmotech.coal.store.pyarrow import (
40+
store_table,
41+
convert_store_table_to_dataframe as convert_store_table_to_pyarrow_table,
42+
)

cosmotech/coal/store/csv.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
18
import pathlib
29

310
import pyarrow.csv as pc
@@ -27,11 +34,7 @@ def convert_store_table_to_csv(
2734
replace_existsing_file: bool = False,
2835
store=Store(),
2936
):
30-
if (
31-
csv_path.name.endswith(".csv")
32-
and csv_path.exists()
33-
and not replace_existsing_file
34-
):
37+
if csv_path.name.endswith(".csv") and csv_path.exists() and not replace_existsing_file:
3538
raise FileExistsError(f"File {csv_path} already exists")
3639
if not csv_path.name.endswith(".csv"):
3740
csv_path = csv_path / f"{table_name}.csv"

cosmotech/coal/store/native_python.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
18
import pyarrow as pa
29

310
from cosmotech.coal.store.store import Store

cosmotech/coal/store/pandas.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
18
import pyarrow
29

310
from cosmotech.coal.store.store import Store
11+
import pandas as pd
412

5-
try:
6-
import pandas as pd
713

8-
def store_dataframe(
9-
table_name: str,
10-
dataframe: pd.DataFrame,
11-
replace_existsing_file: bool = False,
12-
store=Store(),
13-
):
14-
data = pyarrow.Table.from_pandas(dataframe)
14+
def store_dataframe(
15+
table_name: str,
16+
dataframe: pd.DataFrame,
17+
replace_existsing_file: bool = False,
18+
store=Store(),
19+
):
20+
data = pyarrow.Table.from_pandas(dataframe)
1521

16-
store.add_table(
17-
table_name=table_name, data=data, replace=replace_existsing_file
18-
)
22+
store.add_table(table_name=table_name, data=data, replace=replace_existsing_file)
1923

20-
def convert_store_table_to_dataframe(
21-
table_name: str, store=Store()
22-
) -> pd.DataFrame:
23-
return store.get_table(table_name).to_pandas()
2424

25-
except ModuleNotFoundError:
26-
pass
25+
def convert_store_table_to_dataframe(table_name: str, store=Store()) -> pd.DataFrame:
26+
return store.get_table(table_name).to_pandas()

cosmotech/coal/store/pyarrow.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
1+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
18
from cosmotech.coal.store.store import Store
29

3-
try:
4-
import pyarrow as pa
10+
import pyarrow as pa
11+
512

6-
def store_table(
7-
table_name: str,
8-
data: pa.Table,
9-
replace_existsing_file: bool = False,
10-
store=Store(),
11-
):
12-
store.add_table(
13-
table_name=table_name, data=data, replace=replace_existsing_file
14-
)
13+
def store_table(
14+
table_name: str,
15+
data: pa.Table,
16+
replace_existsing_file: bool = False,
17+
store=Store(),
18+
):
19+
store.add_table(table_name=table_name, data=data, replace=replace_existsing_file)
1520

16-
def convert_store_table_to_dataframe(table_name: str, store=Store()) -> pa.Table:
17-
return store.get_table(table_name)
1821

19-
except ModuleNotFoundError:
20-
pass
22+
def convert_store_table_to_dataframe(table_name: str, store=Store()) -> pa.Table:
23+
return store.get_table(table_name)

cosmotech/coal/store/store.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
18
import os
29
import pathlib
310

411
import pyarrow
512
from adbc_driver_sqlite import dbapi
613

714
from cosmotech.coal.utils.logger import LOGGER
15+
from cosmotech.orchestrator.utils.translate import T
816

917

1018
class Store:
@@ -15,9 +23,7 @@ def sanitize_column(column_name: str) -> str:
1523
def __init__(
1624
self,
1725
reset=False,
18-
store_location: pathlib.Path = pathlib.Path(
19-
os.environ.get("CSM_PARAMETERS_ABSOLUTE_PATH", ".")
20-
),
26+
store_location: pathlib.Path = pathlib.Path(os.environ.get("CSM_PARAMETERS_ABSOLUTE_PATH", ".")),
2127
):
2228
self.store_location = pathlib.Path(store_location) / ".coal/store"
2329
self.store_location.mkdir(parents=True, exist_ok=True)
@@ -33,25 +39,23 @@ def reset(self):
3339

3440
def get_table(self, table_name: str) -> pyarrow.Table:
3541
if not self.table_exists(table_name):
36-
raise ValueError(f"No table with name {table_name} exists")
42+
raise ValueError(T("coal.errors.data.no_table").format(table_name=table_name))
3743
return self.execute_query(f"select * from {table_name}")
3844

3945
def table_exists(self, table_name) -> bool:
4046
return table_name in self.list_tables()
4147

4248
def get_table_schema(self, table_name: str) -> pyarrow.Schema:
4349
if not self.table_exists(table_name):
44-
raise ValueError(f"No table with name {table_name} exists")
50+
raise ValueError(T("coal.errors.data.no_table").format(table_name=table_name))
4551
with dbapi.connect(self._database) as conn:
4652
return conn.adbc_get_table_schema(table_name)
4753

4854
def add_table(self, table_name: str, data=pyarrow.Table, replace: bool = False):
4955
with dbapi.connect(self._database, autocommit=True) as conn:
5056
with conn.cursor() as curs:
51-
rows = curs.adbc_ingest(
52-
table_name, data, "replace" if replace else "create_append"
53-
)
54-
LOGGER.debug(f"Inserted {rows} rows in table {table_name}")
57+
rows = curs.adbc_ingest(table_name, data, "replace" if replace else "create_append")
58+
LOGGER.debug(T("coal.logs.data_transfer.rows_inserted").format(rows=rows, table_name=table_name))
5559

5660
def execute_query(self, sql_query: str) -> pyarrow.Table:
5761
batch_size = 1024
@@ -60,9 +64,7 @@ def execute_query(self, sql_query: str) -> pyarrow.Table:
6064
try:
6165
with dbapi.connect(self._database, autocommit=True) as conn:
6266
with conn.cursor() as curs:
63-
curs.adbc_statement.set_options(
64-
**{"adbc.sqlite.query.batch_rows": str(batch_size)}
65-
)
67+
curs.adbc_statement.set_options(**{"adbc.sqlite.query.batch_rows": str(batch_size)})
6668
curs.execute(sql_query)
6769
return curs.fetch_arrow_table()
6870
except OSError:

0 commit comments

Comments
 (0)