Skip to content

Commit 2ffea74

Browse files
Merge pull request #46 from Cosmo-Tech/JREY/add_load_from_singlestore_command
Add load_from_singlestore command
2 parents a9fb8c7 + 25f5c4a commit 2ffea74

File tree

4 files changed

+136
-0
lines changed

4 files changed

+136
-0
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Copyright (C) - 2023 - 2024 - Cosmo Tech
2+
# This document and all information contained herein is the exclusive property -
3+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
4+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
5+
# etc., to any person is prohibited unless it has been previously and
6+
# specifically authorized by written means by Cosmo Tech.
7+
import pathlib
8+
import time
9+
import csv
10+
import singlestoredb as s2
11+
from sqlite3 import Cursor
12+
13+
from cosmotech.coal.cli.utils.click import click
14+
from cosmotech.coal.cli.utils.decorators import web_help
15+
from cosmotech.coal.store.csv import store_csv_file
16+
from cosmotech.coal.store.store import Store
17+
from cosmotech.coal.utils.logger import LOGGER
18+
19+
def get_data(table_name:str, output_directory:str, cursor: Cursor):
20+
"""
21+
Run a SQL query to fetch all data from a table and write them in csv files
22+
"""
23+
start_time = time.perf_counter()
24+
cursor.execute(f"SELECT * FROM {table_name}")
25+
rows = cursor.fetchall()
26+
end_time = time.perf_counter()
27+
LOGGER.info(f"Rows fetched in {table_name} table: {len(rows)} in {round(end_time - start_time, 2)} seconds")
28+
with open(f"{output_directory}/{table_name}.csv", "w", newline="") as csv_stock:
29+
w = csv.DictWriter(csv_stock, rows[0].keys())
30+
w.writeheader()
31+
w.writerows(rows)
32+
33+
@click.command()
34+
@web_help("csm-data/store/load-from-singlestore")
35+
@click.option("--singlestore-host",
36+
"single_store_host",
37+
envvar="SINGLE_STORE_HOST",
38+
help="SingleStore instance URI",
39+
type=str,
40+
show_envvar=True,
41+
required=True)
42+
@click.option('--singlestore-port',
43+
"single_store_port",
44+
help='SingleStore port',
45+
envvar="SINGLE_STORE_PORT",
46+
show_envvar=True,
47+
required=False,
48+
default=3306)
49+
@click.option('--singlestore-db',
50+
"single_store_db",
51+
help='SingleStore database name',
52+
envvar="SINGLE_STORE_DB",
53+
show_envvar=True,
54+
required=True)
55+
@click.option('--singlestore-user',
56+
"single_store_user",
57+
help='SingleStore connection user name',
58+
envvar="SINGLE_STORE_USERNAME",
59+
show_envvar=True,
60+
required=True)
61+
@click.option('--singlestore-password',
62+
"single_store_password",
63+
help='SingleStore connection password',
64+
envvar="SINGLE_STORE_PASSWORD",
65+
show_envvar=True,
66+
required=True)
67+
@click.option('--singlestore-tables',
68+
"single_store_tables",
69+
help='SingleStore table names to fetched (separated by comma)',
70+
envvar="SINGLE_STORE_TABLES",
71+
show_envvar=True,
72+
required=True)
73+
@click.option("--store-folder",
74+
"store_folder",
75+
envvar="CSM_PARAMETERS_ABSOLUTE_PATH",
76+
help="The folder containing the store files",
77+
metavar="PATH",
78+
type=str,
79+
show_envvar=True,
80+
required=True)
81+
def load_from_singlestore(
82+
single_store_host,
83+
single_store_port,
84+
single_store_db,
85+
single_store_user,
86+
single_store_password,
87+
store_folder,
88+
single_store_tables:str =""):
89+
"""Load data from SingleStore tables into the store.
90+
Will download everything from a given SingleStore database following some configuration into the store.
91+
Make use of the singlestoredb to access to SingleStore
92+
More information is available on this page:
93+
[https://docs.singlestore.com/cloud/developer-resources/connect-with-application-development-tools/connect-with-python/connect-using-the-singlestore-python-client/]
94+
"""
95+
96+
single_store_working_dir = store_folder + "/singlestore"
97+
if not pathlib.Path.exists(single_store_working_dir):
98+
pathlib.Path.mkdir(single_store_working_dir)
99+
100+
start_full = time.perf_counter()
101+
102+
conn = s2.connect(host=single_store_host,
103+
port=single_store_port,
104+
database=single_store_db,
105+
user=single_store_user,
106+
password=single_store_password,
107+
results_type="dicts")
108+
with conn:
109+
with conn.cursor() as cur:
110+
table_names = single_store_tables.split(",")
111+
if not table_names:
112+
cur.execute("SHOW TABLES")
113+
table_names = cur.fetchall()
114+
LOGGER.info(f"Tables to fetched: {table_names}")
115+
for name in table_names:
116+
get_data(name, single_store_working_dir, cur)
117+
end_full = time.perf_counter()
118+
LOGGER.info(f"Full dataset fetched and wrote in {round(end_full - start_full, 2)} seconds")
119+
120+
for csv_path in pathlib.Path(single_store_working_dir).glob("*.csv"):
121+
LOGGER.info(f"Found {csv_path.name}, storing it")
122+
store_csv_file(csv_path.name[:-4], csv_path, store=Store(False, store_folder))

cosmotech/coal/cli/commands/store/store.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from cosmotech.coal.cli.commands.store.dump_to_s3 import dump_to_s3
1212
from cosmotech.coal.cli.commands.store.list_tables import list_tables
1313
from cosmotech.coal.cli.commands.store.load_csv_folder import load_csv_folder
14+
from cosmotech.coal.cli.commands.store.load_from_singlestore import load_from_singlestore
1415
from cosmotech.coal.cli.commands.store.reset import reset
1516
from cosmotech.coal.cli.utils.click import click
1617
from cosmotech.coal.cli.utils.decorators import web_help
@@ -30,6 +31,7 @@ def store():
3031
store.add_command(reset, "reset")
3132
store.add_command(list_tables, "list-tables")
3233
store.add_command(load_csv_folder, "load-csv-folder")
34+
store.add_command(load_from_singlestore, "load-from-singlestore")
3335
store.add_command(dump_to_postgresql, "dump-to-postgresql")
3436
store.add_command(dump_to_s3, "dump-to-s3")
3537
store.add_command(dump_to_azure, "dump-to-azure")
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
hide:
3+
- toc
4+
description: "Command help: `csm-data store load-from-singlestore`"
5+
---
6+
# load-from-singlestore
7+
8+
!!! info "Help command"
9+
```text
10+
--8<-- "generated/commands_help/csm-data/store/load-from-singlestore.txt"
11+
```

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ cosmotech-api~=3.2
1919
# Commands requirements
2020
boto3~=1.34
2121
requests~=2.32.3
22+
singlestoredb~=1.10.0
2223

2324
# Orchestrator templates requirements
2425
cosmotech-run-orchestrator~=1.6.0

0 commit comments

Comments
 (0)