File tree Expand file tree Collapse file tree 2 files changed +13
-1
lines changed
src/datachain/data_storage Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Original file line number Diff line number Diff line change @@ -390,7 +390,9 @@ def dataset_stats(
390390 expressions : tuple [_ColumnsClauseArgument [Any ], ...] = (
391391 sa .func .count (table .c .sys__id ),
392392 )
393- if "size" in table .columns :
393+ if "file__size" in table .columns :
394+ expressions = (* expressions , sa .func .sum (table .c .file__size ))
395+ elif "size" in table .columns :
394396 expressions = (* expressions , sa .func .sum (table .c .size ))
395397 query = select (* expressions )
396398 ((nrows , * rest ),) = self .db .execute (query )
Original file line number Diff line number Diff line change 44import pandas as pd
55import pytest
66
7+ from datachain .dataset import DatasetStats
78from datachain .lib .dc import DataChain
89from datachain .lib .file import File
910
@@ -205,3 +206,12 @@ def test_show_no_truncate(capsys, catalog):
205206 for i in range (3 ):
206207 assert client [i ] in normalized_output
207208 assert details [i ] in normalized_output
209+
210+
211+ def test_from_storage_dataset_stats (tmp_dir , catalog ):
212+ for i in range (4 ):
213+ (tmp_dir / f"file{ i } .txt" ).write_text (f"file{ i } " )
214+
215+ dc = DataChain .from_storage (tmp_dir .as_uri (), catalog = catalog ).save ("test-data" )
216+ stats = catalog .dataset_stats (dc .name , dc .version )
217+ assert stats == DatasetStats (num_objects = 4 , size = 20 )
You can’t perform that action at this time.
0 commit comments