Skip to content

Commit 2e2e604

Browse files
authored
drop unused code from warehouse (#1425)
1 parent efe1202 commit 2e2e604

File tree

2 files changed

+0
-71
lines changed

2 files changed

+0
-71
lines changed

src/datachain/data_storage/sqlite.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from tqdm.auto import tqdm
3030

3131
import datachain.sql.sqlite
32-
from datachain import semver
3332
from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
3433
from datachain.data_storage.db_engine import DatabaseEngine
3534
from datachain.data_storage.schema import DefaultSchema
@@ -692,61 +691,6 @@ def get_dataset_sources(
692691
for row in self.db.execute(query, cursor=cur)
693692
]
694693

695-
def merge_dataset_rows(
696-
self,
697-
src: DatasetRecord,
698-
dst: DatasetRecord,
699-
src_version: str,
700-
dst_version: str,
701-
) -> None:
702-
dst_empty = False
703-
704-
if not self.db.has_table(self.dataset_table_name(src, src_version)):
705-
# source table doesn't exist, nothing to do
706-
return
707-
708-
src_dr = self.dataset_rows(src, src_version).table
709-
710-
if not self.db.has_table(self.dataset_table_name(dst, dst_version)):
711-
# destination table doesn't exist, create it
712-
self.create_dataset_rows_table(
713-
self.dataset_table_name(dst, dst_version),
714-
columns=src_dr.columns,
715-
)
716-
dst_empty = True
717-
718-
dst_dr = self.dataset_rows(dst, dst_version).table
719-
merge_fields = [c.name for c in src_dr.columns if c.name != "sys__id"]
720-
select_src = select(*(getattr(src_dr.columns, f) for f in merge_fields))
721-
722-
if dst_empty:
723-
# we don't need union, but just select from source to destination
724-
insert_query = sqlite.insert(dst_dr).from_select(merge_fields, select_src)
725-
else:
726-
dst_version_latest = None
727-
# find the previous version of the destination dataset
728-
dst_previous_versions = [
729-
v.version
730-
for v in dst.versions # type: ignore [union-attr]
731-
if semver.compare(v.version, dst_version) == -1
732-
]
733-
if dst_previous_versions:
734-
dst_version_latest = max(dst_previous_versions)
735-
736-
dst_dr_latest = self.dataset_rows(dst, dst_version_latest).table
737-
738-
select_dst_latest = select(
739-
*(getattr(dst_dr_latest.c, f) for f in merge_fields)
740-
)
741-
union_query = sqlalchemy.union(select_src, select_dst_latest)
742-
insert_query = (
743-
sqlite.insert(dst_dr)
744-
.from_select(merge_fields, union_query)
745-
.prefix_with("OR IGNORE")
746-
)
747-
748-
self.db.execute(insert_query)
749-
750694
def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
751695
return (e.model_dump() for e in entries)
752696

src/datachain/data_storage/warehouse.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -371,21 +371,6 @@ def drop_dataset_rows_table(
371371
table = sa.Table(table_name, self.db.metadata)
372372
self.db.drop_table(table, if_exists=if_exists)
373373

374-
@abstractmethod
375-
def merge_dataset_rows(
376-
self,
377-
src: "DatasetRecord",
378-
dst: "DatasetRecord",
379-
src_version: str,
380-
dst_version: str,
381-
) -> None:
382-
"""
383-
Merges source dataset rows and current latest destination dataset rows
384-
into a new rows table created for new destination dataset version.
385-
Note that table for new destination version must be created upfront.
386-
Merge results should not contain duplicates.
387-
"""
388-
389374
def dataset_rows_select(
390375
self,
391376
query: sa.Select,

0 commit comments

Comments
 (0)