Skip to content

Commit b65179f

Browse files
authored
Merge pull request #535 from duckdb/v1.4-andium
V1.4 andium
2 parents cd56bdc + ccb1936 commit b65179f

18 files changed

+430
-68
lines changed

.github/workflows/LocalTesting.yml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,6 @@ jobs:
5555
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
5656
sudo chmod +x /usr/local/bin/docker-compose
5757
58-
- name: Install required python packages
59-
run: |
60-
python3 -m pip install pyiceberg pytest pyarrow
61-
6258
- name: Install CMake 3.x
6359
run: |
6460
sudo apt-get remove -y cmake cmake-data
@@ -69,6 +65,14 @@ jobs:
6965
fetch-depth: 0
7066
submodules: 'true'
7167

68+
- uses: actions/setup-python@v6
69+
with:
70+
python-version: '3.13'
71+
72+
- name: Install required python packages
73+
run: |
74+
python3 -m pip install -r scripts/requirements.txt
75+
7276
- name: Check installed versions
7377
run: |
7478
ninja --version
@@ -105,4 +109,4 @@ jobs:
105109
env:
106110
ICEBERG_SERVER_AVAILABLE: 1
107111
run: |
108-
python3 -m pytest test/python
112+
python3 -m pytest test/python

.github/workflows/MainDistributionPipeline.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ jobs:
1717
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
1818
with:
1919
extension_name: iceberg
20-
duckdb_version: v1.4.0
21-
ci_tools_version: main
20+
duckdb_version: v1.4.1
21+
ci_tools_version: v1.4.1
2222
exclude_archs: 'windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads'
2323
extra_toolchains: 'python3'
2424

@@ -29,8 +29,8 @@ jobs:
2929
secrets: inherit
3030
with:
3131
extension_name: iceberg
32-
duckdb_version: v1.4.0
33-
ci_tools_version: main
32+
duckdb_version: v1.4.1
33+
ci_tools_version: v1.4.1
3434
exclude_archs: 'windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads'
3535
deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }}
3636
deploy_versioned: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }}

duckdb

Submodule duckdb updated 387 files

extension_config.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
duckdb_extension_load(avro
33
LOAD_TESTS
44
GIT_URL https://github.com/duckdb/duckdb-avro
5-
GIT_TAG 0c97a61781f63f8c5444cf3e0c6881ecbaa9fe13
5+
GIT_TAG 7b75062f6345d11c5342c09216a75c57342c2e82
66
)
77

88
# Extension from this repo
@@ -17,7 +17,7 @@ duckdb_extension_load(icu)
1717
duckdb_extension_load(ducklake
1818
LOAD_TESTS
1919
GIT_URL https://github.com/duckdb/ducklake
20-
GIT_TAG dbb022506e21c27fc4d4cd3d14995af89955401a
20+
GIT_TAG f134ad86f2f6e7cdf4133086c38ecd9c48f1a772
2121
)
2222

2323

@@ -27,7 +27,7 @@ if (NOT MINGW)
2727
duckdb_extension_load(aws
2828
LOAD_TESTS
2929
GIT_URL https://github.com/duckdb/duckdb-aws
30-
GIT_TAG f855eb3dce37700bfd36fe906a683e4be17dcaf6
30+
GIT_TAG 18803d5e55b9f9f6dda5047d0fdb4f4238b6801d
3131
)
3232
endif ()
3333
endif()
27.8 MB
Binary file not shown.

scripts/requirements.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
pyspark==3.5.0
22
duckdb
3-
pyiceberg
4-
pyarrow
3+
pydantic==2.9.0
4+
pyiceberg==0.9.1
5+
pyarrow
6+
pytest

src/catalog_api.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,10 @@ rest_api_objects::LoadTableResult IRCAPI::CommitNewTable(ClientContext &context,
429429
try {
430430
HTTPHeaders headers(*context.db);
431431
headers.Insert("Content-Type", "application/json");
432+
// if you are creating a table with stage create, you need vended credentials
433+
if (catalog.attach_options.access_mode == IRCAccessDelegationMode::VENDED_CREDENTIALS) {
434+
headers.Insert("X-Iceberg-Access-Delegation", "vended-credentials");
435+
}
432436
auto response =
433437
catalog.auth_handler->Request(RequestType::POST_REQUEST, context, url_builder, headers, create_table_json);
434438
if (response->status != HTTPStatusCode::OK_200) {

src/common/api_utils.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "duckdb/common/exception.hpp"
44
#include "duckdb/common/http_util.hpp"
5+
#include "duckdb/main/extension_helper.hpp"
56
#include "duckdb/common/exception/http_exception.hpp"
67
#include "duckdb/common/string_util.hpp"
78
#include "duckdb/main/client_data.hpp"
@@ -30,6 +31,15 @@ const string &APIUtils::GetCURLCertPath() {
3031
unique_ptr<HTTPResponse> APIUtils::Request(RequestType request_type, ClientContext &context,
3132
const IRCEndpointBuilder &endpoint_builder, HTTPHeaders &headers,
3233
const string &data) {
34+
// load httpfs since iceberg requests do not go through the file system api
35+
if (!context.db.get()) {
36+
throw InvalidConfigurationException("Context does not have database instance when loading Httpfs in Iceberg");
37+
}
38+
ExtensionHelper::AutoLoadExtension(*context.db, "httpfs");
39+
if (!context.db->ExtensionIsLoaded("httpfs")) {
40+
throw MissingExtensionException("The iceberg extension requires the httpfs extension to be loaded!");
41+
}
42+
3343
auto &db = DatabaseInstance::GetDatabase(context);
3444
string request_url = AddHttpHostIfMissing(endpoint_builder.GetURL());
3545

src/iceberg_functions/iceberg_multi_file_list.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ bool IcebergMultiFileList::FileMatchesFilter(const IcebergManifestEntry &file) {
319319
}
320320

321321
auto &filter = *it->second;
322-
if (!IcebergPredicate::MatchBounds(filter, stats, IcebergTransform::Identity())) {
322+
if (!IcebergPredicate::MatchBounds(context, filter, stats, IcebergTransform::Identity())) {
323323
//! If any predicate fails, exclude the file
324324
return false;
325325
}
@@ -521,7 +521,7 @@ bool IcebergMultiFileList::ManifestMatchesFilter(const IcebergManifest &manifest
521521
stats.has_null = field_summary.contains_null;
522522
stats.has_not_null = true; // Not enough information in field_summary to determine if this should be false
523523

524-
if (!IcebergPredicate::MatchBounds(*table_filter, stats, field.transform)) {
524+
if (!IcebergPredicate::MatchBounds(context, *table_filter, stats, field.transform)) {
525525
return false;
526526
}
527527
}

src/iceberg_manifest.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,21 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
7878
vector<LogicalType> types;
7979

8080
auto &current_partition_spec = table_info.table_metadata.GetLatestPartitionSpec();
81+
8182
{
83+
child_list_t<Value> status_field;
8284
// status: int - 0
8385
names.push_back("status");
8486
types.push_back(LogicalType::INTEGER);
85-
field_ids.emplace_back("status", Value::INTEGER(STATUS));
87+
status_field.emplace_back("__duckdb_field_id", Value::INTEGER(STATUS));
88+
status_field.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
8689

8790
auto field_obj = yyjson_mut_arr_add_obj(doc, fields_arr);
8891
yyjson_mut_obj_add_uint(doc, field_obj, "id", STATUS);
8992
yyjson_mut_obj_add_strcpy(doc, field_obj, "name", "status");
9093
yyjson_mut_obj_add_bool(doc, field_obj, "required", true);
9194
yyjson_mut_obj_add_strcpy(doc, field_obj, "type", "int");
95+
field_ids.emplace_back("status", Value::STRUCT(status_field));
9296
}
9397

9498
{
@@ -137,9 +141,12 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
137141

138142
auto child_fields_arr = yyjson_mut_arr(doc);
139143
{
144+
child_list_t<Value> content_field;
140145
// content: int - 134
141146
children.emplace_back("content", LogicalType::INTEGER);
142-
data_file_field_ids.emplace_back("content", Value::INTEGER(CONTENT));
147+
content_field.emplace_back("__duckdb_field_id", Value::INTEGER(CONTENT));
148+
content_field.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
149+
data_file_field_ids.emplace_back("content", Value::STRUCT(content_field));
143150

144151
auto field_obj = yyjson_mut_arr_add_obj(doc, child_fields_arr);
145152
yyjson_mut_obj_add_uint(doc, field_obj, "id", CONTENT);
@@ -149,9 +156,12 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
149156
}
150157

151158
{
159+
child_list_t<Value> file_path;
152160
// file_path: string - 100
153161
children.emplace_back("file_path", LogicalType::VARCHAR);
154-
data_file_field_ids.emplace_back("file_path", Value::INTEGER(FILE_PATH));
162+
file_path.emplace_back("__duckdb_field_id", Value::INTEGER(FILE_PATH));
163+
file_path.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
164+
data_file_field_ids.emplace_back("file_path", Value::STRUCT(file_path));
155165

156166
auto field_obj = yyjson_mut_arr_add_obj(doc, child_fields_arr);
157167
yyjson_mut_obj_add_uint(doc, field_obj, "id", FILE_PATH);
@@ -161,9 +171,12 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
161171
}
162172

163173
{
174+
child_list_t<Value> file_format;
164175
// file_format: string - 101
165176
children.emplace_back("file_format", LogicalType::VARCHAR);
166-
data_file_field_ids.emplace_back("file_format", Value::INTEGER(FILE_FORMAT));
177+
file_format.emplace_back("__duckdb_field_id", Value::INTEGER(FILE_FORMAT));
178+
file_format.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
179+
data_file_field_ids.emplace_back("file_format", Value::STRUCT(file_format));
167180

168181
auto field_obj = yyjson_mut_arr_add_obj(doc, child_fields_arr);
169182
yyjson_mut_obj_add_uint(doc, field_obj, "id", FILE_FORMAT);
@@ -173,9 +186,12 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
173186
}
174187

175188
{
189+
child_list_t<Value> partition;
176190
// partition: struct(...) - 102
177191
children.emplace_back("partition", PartitionStructType(table_info, manifest_file));
178-
data_file_field_ids.emplace_back("partition", Value::INTEGER(PARTITION));
192+
partition.emplace_back("__duckdb_field_id", Value::INTEGER(PARTITION));
193+
partition.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
194+
data_file_field_ids.emplace_back("partition", Value::STRUCT(partition));
179195

180196
auto field_obj = yyjson_mut_arr_add_obj(doc, child_fields_arr);
181197
yyjson_mut_obj_add_uint(doc, field_obj, "id", PARTITION);
@@ -190,9 +206,12 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
190206
}
191207

192208
{
209+
child_list_t<Value> record_count;
193210
// record_count: long - 103
194211
children.emplace_back("record_count", LogicalType::BIGINT);
195-
data_file_field_ids.emplace_back("record_count", Value::INTEGER(RECORD_COUNT));
212+
record_count.emplace_back("__duckdb_field_id", Value::INTEGER(RECORD_COUNT));
213+
record_count.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
214+
data_file_field_ids.emplace_back("record_count", Value::STRUCT(record_count));
196215

197216
auto field_obj = yyjson_mut_arr_add_obj(doc, child_fields_arr);
198217
yyjson_mut_obj_add_uint(doc, field_obj, "id", RECORD_COUNT);
@@ -202,9 +221,12 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
202221
}
203222

204223
{
224+
child_list_t<Value> file_size_in_bytes;
205225
// file_size_in_bytes: long - 104
206226
children.emplace_back("file_size_in_bytes", LogicalType::BIGINT);
207-
data_file_field_ids.emplace_back("file_size_in_bytes", Value::INTEGER(FILE_SIZE_IN_BYTES));
227+
file_size_in_bytes.emplace_back("__duckdb_field_id", Value::INTEGER(FILE_SIZE_IN_BYTES));
228+
file_size_in_bytes.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
229+
data_file_field_ids.emplace_back("file_size_in_bytes", Value::STRUCT(file_size_in_bytes));
208230

209231
auto field_obj = yyjson_mut_arr_add_obj(doc, child_fields_arr);
210232
yyjson_mut_obj_add_uint(doc, field_obj, "id", FILE_SIZE_IN_BYTES);
@@ -226,6 +248,7 @@ idx_t WriteToFile(IcebergTableInformation &table_info, const IcebergManifestFile
226248
names.push_back("data_file");
227249
types.push_back(LogicalType::STRUCT(std::move(children)));
228250
data_file_field_ids.emplace_back("__duckdb_field_id", Value::INTEGER(DATA_FILE));
251+
data_file_field_ids.emplace_back("__duckdb_nullable", Value::BOOLEAN(false));
229252
field_ids.emplace_back("data_file", Value::STRUCT(data_file_field_ids));
230253

231254
auto field_obj = yyjson_mut_arr_add_obj(doc, fields_arr);

0 commit comments

Comments
 (0)