|
10 | 10 | from tqdm.contrib.concurrent import thread_map |
11 | 11 |
|
12 | 12 | from libcommon.constants import ROW_IDX_COLUMN |
13 | | -from libcommon.parquet_utils import ( |
14 | | - PARTIAL_PREFIX, |
15 | | - is_list_pa_type, |
16 | | - parquet_export_is_partial, |
17 | | -) |
| 13 | +from libcommon.parquet_utils import is_list_pa_type |
18 | 14 | from libcommon.statistics_utils import ( |
19 | 15 | STRING_DTYPES, |
20 | 16 | AudioColumn, |
|
37 | 33 | "CREATE OR REPLACE TABLE data AS SELECT {columns} FROM read_parquet({source});" |
38 | 34 | ) |
39 | 35 | CREATE_TABLE_JOIN_WITH_TRANSFORMED_DATA_COMMAND_FROM_LIST_OF_PARQUET_FILES = """ |
40 | | - CREATE OR REPLACE TABLE data AS |
41 | | - SELECT {columns}, transformed_df.* FROM read_parquet({source}) |
| 36 | + CREATE OR REPLACE TABLE data AS |
| 37 | + SELECT {columns}, transformed_df.* FROM read_parquet({source}) |
42 | 38 | POSITIONAL JOIN transformed_df; |
43 | 39 | """ |
44 | 40 | CREATE_SEQUENCE_COMMAND = "CREATE OR REPLACE SEQUENCE serial START 0 MINVALUE 0;" |
@@ -187,25 +183,6 @@ def compute_transformed_data(parquet_paths: list[Path], features: dict[str, Any] |
187 | 183 | return transformed_df |
188 | 184 |
|
189 | 185 |
|
190 | | -def duckdb_index_is_partial(duckdb_index_url: str) -> bool: |
191 | | - """ |
192 | | - Check if the DuckDB index is on the full dataset or if it's partial. |
193 | | - It could be partial for two reasons: |
194 | | -
|
195 | | - 1. if the Parquet export that was used to build it is partial |
196 | | - 2. if it's a partial index of the Parquet export |
197 | | -
|
198 | | - Args: |
199 | | - duckdb_index_url (`str`): The URL of the DuckDB index file. |
200 | | -
|
201 | | - Returns: |
202 | | - `bool`: True is the DuckDB index is partial, |
203 | | - or False if it's an index of the full dataset. |
204 | | - """ |
205 | | - _, duckdb_index_file_name = duckdb_index_url.rsplit("/", 1) |
206 | | - return parquet_export_is_partial(duckdb_index_url) or duckdb_index_file_name.startswith(PARTIAL_PREFIX) |
207 | | - |
208 | | - |
209 | 186 | def create_index( |
210 | 187 | database: str, |
211 | 188 | input_table: str, |
|
0 commit comments