Skip to content

Commit 7f3d8dc

Browse files
committed
desperate step: add debug output to see what happens in CI
1 parent afe6c3f commit 7f3d8dc

File tree

4 files changed

+88
-5
lines changed

4 files changed

+88
-5
lines changed

src/Interpreters/ClusterFunctionReadTask.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <Interpreters/ActionsDAG.h>
99
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
1010
#include <Common/logger_useful.h>
11+
#include <iostream>
1112

1213
namespace DB
1314
{
@@ -36,6 +37,13 @@ ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(ObjectInfoPtr o
3637
const bool send_over_whole_archive = !context->getSettingsRef()[Setting::cluster_function_process_archive_on_multiple_nodes];
3738
path = send_over_whole_archive ? object->getPathOrPathToArchiveIfArchive() : object->getPath();
3839
absolute_path = object->getAbsolutePath();
40+
41+
std::cerr << "[MASTER ClusterFunctionReadTaskResponse] object->getPath()=" << object->getPath()
42+
<< " object->getAbsolutePath()=" << (object->getAbsolutePath().has_value() ? object->getAbsolutePath().value() : "none")
43+
<< " object->getObjectStorage()=" << (object->getObjectStorage().has_value() ? "set" : "not_set")
44+
<< " extracted path=" << path
45+
<< " extracted absolute_path=" << (absolute_path.has_value() ? absolute_path.value() : "none")
46+
<< std::endl;
3947
}
4048

4149
ClusterFunctionReadTaskResponse::ClusterFunctionReadTaskResponse(const std::string & path_)
@@ -52,6 +60,13 @@ ObjectInfoPtr ClusterFunctionReadTaskResponse::getObjectInfo() const
5260
object->data_lake_metadata = data_lake_metadata;
5361
object->file_meta_info = file_meta_info;
5462
object->absolute_path = absolute_path;
63+
64+
std::cerr << "[WORKER ClusterFunctionReadTaskResponse::getObjectInfo] path=" << path
65+
<< " absolute_path=" << (absolute_path.has_value() ? absolute_path.value() : "none")
66+
<< " created object->getPath()=" << object->getPath()
67+
<< " object->getAbsolutePath()=" << (object->getAbsolutePath().has_value() ? object->getAbsolutePath().value() : "none")
68+
<< std::endl;
69+
5570
return object;
5671
}
5772

src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergIterator.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include <IO/ReadBufferFromString.h>
2525
#include <IO/ReadHelpers.h>
2626
#include <Interpreters/Context.h>
27+
#include <iostream>
28+
#include <Disks/DiskType.h>
2729

2830
#include <IO/CompressedReadBufferWrapper.h>
2931
#include <Interpreters/ExpressionActions.h>
@@ -339,8 +341,21 @@ ObjectInfoPtr IcebergIterator::next(size_t)
339341
auto [storage_to_use, resolved_key] = resolveObjectStorageForPath(
340342
persistent_components.table_location, manifest_file_entry.file_path, object_storage, secondary_storages, local_context);
341343

344+
std::cerr << "[MASTER IcebergIterator::next] table_location=" << persistent_components.table_location
345+
<< " file_path=" << manifest_file_entry.file_path
346+
<< " resolved_key=" << resolved_key
347+
<< " storage_type=" << (storage_to_use ? toString(storage_to_use->getType()) : "null")
348+
<< " storage_desc=" << (storage_to_use ? storage_to_use->getDescription() : "null")
349+
<< " storage_same_as_base=" << (storage_to_use == object_storage ? "true" : "false")
350+
<< std::endl;
351+
342352
IcebergDataObjectInfoPtr object_info = std::make_shared<IcebergDataObjectInfo>(manifest_file_entry, storage_to_use, resolved_key);
343353

354+
std::cerr << "[MASTER IcebergIterator::next] created object_info: getPath()=" << object_info->getPath()
355+
<< " getAbsolutePath()=" << (object_info->getAbsolutePath().has_value() ? object_info->getAbsolutePath().value() : "none")
356+
<< " getObjectStorage()=" << (object_info->getObjectStorage().has_value() ? "set" : "not_set")
357+
<< std::endl;
358+
344359
for (const auto & position_delete : defineDeletesSpan(manifest_file_entry, position_deletes_files, false))
345360
object_info->addPositionDeleteObject(position_delete);
346361

src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <optional>
2+
#include <iostream>
23
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
34

45
#include <Common/Exception.h>
@@ -500,7 +501,14 @@ class TaskDistributor : public TaskIterator
500501
{
501502
auto task = task_distributor.getNextTask(number_of_current_replica);
502503
if (task)
504+
{
505+
std::cerr << "[MASTER StorageObjectStorageCluster] sending task to replica " << number_of_current_replica
506+
<< " task->getPath()=" << task->getPath()
507+
<< " task->getAbsolutePath()=" << (task->getAbsolutePath().has_value() ? task->getAbsolutePath().value() : "none")
508+
<< " task->getObjectStorage()=" << (task->getObjectStorage().has_value() ? "set" : "not_set")
509+
<< std::endl;
503510
return std::make_shared<ClusterFunctionReadTaskResponse>(std::move(task), context);
511+
}
504512
return std::make_shared<ClusterFunctionReadTaskResponse>();
505513
}
506514

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
#endif
4040

4141
#include <fmt/ranges.h>
42-
42+
#include <iostream>
43+
#include <Disks/DiskType.h>
4344

4445
namespace fs = std::filesystem;
4546
namespace ProfileEvents
@@ -520,6 +521,13 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
520521
while (not_a_path || (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0));
521522

522523
ObjectStoragePtr storage_to_use = object_info->getObjectStorage().value_or(object_storage);
524+
525+
std::cerr << "[WORKER createReader] object_info->getPath()=" << object_info->getPath()
526+
<< " object_info->relative_path=" << object_info->relative_path
527+
<< " object_info->getAbsolutePath()=" << (object_info->getAbsolutePath().has_value() ? object_info->getAbsolutePath().value() : "none")
528+
<< " storage_to_use_type=" << toString(storage_to_use->getType())
529+
<< " storage_to_use_desc=" << storage_to_use->getDescription()
530+
<< std::endl;
523531

524532
QueryPipelineBuilder builder;
525533
std::shared_ptr<ISource> source;
@@ -1317,23 +1325,60 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator
13171325
if (!raw || raw->isEmpty())
13181326
return nullptr;
13191327

1320-
object_info = raw->getObjectInfo();
1328+
std::cerr << "[WORKER ReadTaskIterator::next] received raw->path=" << raw->path
1329+
<< " raw->absolute_path=" << (raw->absolute_path.has_value() ? raw->absolute_path.value() : "none")
1330+
<< std::endl;
13211331

1332+
object_info = raw->getObjectInfo();
1333+
1334+
std::cerr << "[WORKER ReadTaskIterator::next] after getObjectInfo: object_info->getPath()=" << object_info->getPath()
1335+
<< " object_info->getAbsolutePath()=" << (object_info->getAbsolutePath().has_value() ? object_info->getAbsolutePath().value() : "none")
1336+
<< " object_info->getObjectStorage()=" << (object_info->getObjectStorage().has_value() ? "set" : "not_set")
1337+
<< std::endl;
1338+
1339+
// The 'path' field from master is already the correctly resolved relative path.
1340+
// We should use it directly and NOT overwrite relative_path.
1341+
// Only resolve absolute_path if we need to determine which storage to use (for secondary storages).
1342+
object_info->object_storage_to_use = object_storage;
1343+
13221344
if (raw->absolute_path.has_value())
13231345
{
13241346
auto [storage_to_use, key]
13251347
= resolveObjectStorageForPath("", raw->absolute_path.value(), object_storage, secondary_storages, getContext());
13261348

1327-
if (!key.empty()) /// Otherwise not a valid key/path, maybe it is "retry_after_us". Store as is.
1349+
std::cerr << "[WORKER ReadTaskIterator::next] resolved absolute_path: key=" << key
1350+
<< " storage_type=" << (storage_to_use ? toString(storage_to_use->getType()) : "null")
1351+
<< " storage_desc=" << (storage_to_use ? storage_to_use->getDescription() : "null")
1352+
<< " storage_same_as_base=" << (storage_to_use == object_storage ? "true" : "false")
1353+
<< " original_relative_path=" << object_info->relative_path
1354+
<< std::endl;
1355+
1356+
if (!key.empty() && storage_to_use != object_storage)
13281357
{
1358+
// File is in a different storage (secondary storage), use that storage
1359+
// BUT preserve the original relative_path from master - don't overwrite it!
13291360
object_info->object_storage_to_use = storage_to_use;
1330-
object_info->relative_path = key;
1361+
1362+
std::cerr << "[WORKER ReadTaskIterator::next] SET: object_storage_to_use=secondary_storage"
1363+
<< " relative_path=" << object_info->relative_path << " (preserved from master)"
1364+
<< std::endl;
1365+
}
1366+
else
1367+
{
1368+
std::cerr << "[WORKER ReadTaskIterator::next] using default storage, relative_path=" << object_info->relative_path
1369+
<< " (preserved from master)" << std::endl;
13311370
}
13321371
}
13331372
else
13341373
{
1335-
object_info->object_storage_to_use = object_storage;
1374+
std::cerr << "[WORKER ReadTaskIterator::next] NO absolute_path: using default storage, relative_path=" << object_info->relative_path
1375+
<< " (preserved from master)" << std::endl;
13361376
}
1377+
1378+
std::cerr << "[WORKER ReadTaskIterator::next] FINAL: object_info->getPath()=" << object_info->getPath()
1379+
<< " object_info->relative_path=" << object_info->relative_path
1380+
<< " object_info->getObjectStorage()=" << (object_info->getObjectStorage().has_value() ? "set" : "not_set")
1381+
<< std::endl;
13371382
}
13381383
else
13391384
{

0 commit comments

Comments
 (0)