Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit b55f9df

Browse files
committed
Make FP64 capability check a GpuMgr responsibility
1 parent bfdb831 commit b55f9df

File tree

7 files changed

+25
-19
lines changed

7 files changed

+25
-19
lines changed

omniscidb/CudaMgr/CudaMgr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ class CudaMgr : public GpuMgr {
164164
return getMinNumMPsForAllDevices();
165165
};
166166

167+
bool hasFP64Support() const override { return isArchPascalOrLater(); }
168+
167169
bool hasSharedMemoryAtomicsSupport() const override {
168170
/*
169171
* From CUDA Toolkit documentation:

omniscidb/DataMgr/GpuMgr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,7 @@ struct GpuMgr {
6262
virtual uint32_t getGridSize() const = 0;
6363
virtual uint32_t getMinEUNumForAllDevices() const = 0;
6464
virtual bool hasSharedMemoryAtomicsSupport() const = 0;
65+
// TODO: hasFP64Support implementations do not account for different device capabilities
66+
virtual bool hasFP64Support() const { return true; };
6567
virtual size_t getMinSharedMemoryPerBlockForAllDevices() const = 0;
6668
};

omniscidb/L0Mgr/L0Mgr.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,14 @@ bool L0Manager::hasSharedMemoryAtomicsSupport() const {
451451
return true;
452452
}
453453

454+
bool L0Manager::hasFP64Support() const {
455+
CHECK_GT(drivers_[0]->devices().size(), size_t(0));
456+
ze_device_module_properties_t module_props{ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES};
457+
L0_SAFE_CALL(
458+
zeDeviceGetModuleProperties(drivers_[0]->devices()[0]->device(), &module_props));
459+
return module_props.fp64flags;
460+
}
461+
454462
size_t L0Manager::getMinSharedMemoryPerBlockForAllDevices() const {
455463
auto comp = [](const auto& a, const auto& b) {
456464
return a->maxSharedLocalMemory() < b->maxSharedLocalMemory();

omniscidb/L0Mgr/L0Mgr.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,12 +239,13 @@ class L0Manager : public GpuMgr {
239239
size_t getMaxAllocationSize(const int device_num) const;
240240
size_t getPageSize(const int device_num) const { return 4096u; }
241241

242-
virtual uint32_t getMaxBlockSize() const override;
243-
virtual int8_t getSubGroupSize() const override;
244-
virtual uint32_t getGridSize() const override;
245-
virtual uint32_t getMinEUNumForAllDevices() const override;
246-
virtual bool hasSharedMemoryAtomicsSupport() const override;
247-
virtual size_t getMinSharedMemoryPerBlockForAllDevices() const override;
242+
uint32_t getMaxBlockSize() const override;
243+
int8_t getSubGroupSize() const override;
244+
uint32_t getGridSize() const override;
245+
uint32_t getMinEUNumForAllDevices() const override;
246+
bool hasSharedMemoryAtomicsSupport() const override;
247+
bool hasFP64Support() const override;
248+
size_t getMinSharedMemoryPerBlockForAllDevices() const override;
248249

249250
const std::vector<std::shared_ptr<L0Driver>>& drivers() const;
250251

omniscidb/L0Mgr/L0MgrNoL0.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ size_t L0Manager::getMinSharedMemoryPerBlockForAllDevices() const {
148148
return 0u;
149149
};
150150

151+
bool L0Manager::hasFP64Support() const {
152+
CHECK(false);
153+
return false;
154+
}
155+
151156
const std::vector<std::shared_ptr<L0Driver>>& L0Manager::drivers() const {
152157
return drivers_;
153158
}

omniscidb/QueryEngine/Execute.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -880,19 +880,9 @@ GpuMgr* Executor::gpuMgr() const {
880880
return gpu_mgr;
881881
}
882882

883-
bool Executor::isArchPascalOrLater(const ExecutorDeviceType dt) const {
884-
if (dt == ExecutorDeviceType::GPU) {
885-
return gpuMgr()->getPlatform() == GpuMgrPlatform::CUDA
886-
? cudaMgr()->isArchPascalOrLater()
887-
: false;
888-
}
889-
return false;
890-
}
891-
892883
bool Executor::deviceSupportsFP64(const ExecutorDeviceType dt) const {
893884
if (dt == ExecutorDeviceType::GPU) {
894-
return gpuMgr()->getPlatform() == GpuMgrPlatform::CUDA ? isArchPascalOrLater(dt)
895-
: true;
885+
return gpuMgr()->hasFP64Support();
896886
}
897887
return true;
898888
}

omniscidb/QueryEngine/Execute.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,6 @@ class Executor : public StringDictionaryProxyProvider {
431431

432432
GpuMgr* gpuMgr() const;
433433

434-
bool isArchPascalOrLater(const ExecutorDeviceType dt) const;
435-
436434
bool deviceSupportsFP64(const ExecutorDeviceType dt) const;
437435

438436
bool needFetchAllFragments(const InputColDescriptor& col_desc,

0 commit comments

Comments
 (0)