-
Notifications
You must be signed in to change notification settings - Fork 1.1k
backport xe3p #4724
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
backport xe3p #4724
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,10 @@ file(GLOB SOURCES | |
| ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp | ||
| ) | ||
|
|
||
| if(DNNL_WITH_XE3P) | ||
| add_definitions_with_host_compiler(-DXE3P=1) | ||
| endif() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like this is not needed. |
||
|
|
||
| set(OBJ_LIB ${LIB_PACKAGE_NAME}_gpu) | ||
| add_library(${OBJ_LIB} OBJECT ${SOURCES}) | ||
| set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,9 @@ uint64_t get_future_extensions( | |
| case gpu_arch_t::xe2: | ||
| case gpu_arch_t::xe_hpc: | ||
| case gpu_arch_t::xe3: | ||
| case gpu_arch_t::xe3p_35_10: | ||
| case gpu_arch_t::xe3p_35_11: | ||
| case gpu_arch_t::xe3p_35_unknown: | ||
| extensions |= (uint64_t)device_ext_t::intel_global_float_atomics; | ||
| extensions | ||
| |= (uint64_t)device_ext_t::intel_variable_eu_thread_count; | ||
|
|
@@ -109,7 +112,13 @@ bool device_info_t::mayiuse_sub_group(int size) const { | |
| case gpu_arch_t::xe_lp: | ||
| case gpu_arch_t::xe_hp: | ||
| case gpu_arch_t::xe_hpg: return utils::one_of(size, 8, 16, 32); | ||
| default: return utils::one_of(size, 16, 32); | ||
| case gpu_arch_t::xe_hpc: | ||
| case gpu_arch_t::xe2: | ||
| case gpu_arch_t::xe3: | ||
| case gpu_arch_t::xe3p_35_10: | ||
| case gpu_arch_t::xe3p_35_11: | ||
| case gpu_arch_t::xe3p_35_unknown: return utils::one_of(size, 16, 32); | ||
| default: return utils::one_of(size, 32); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -145,6 +154,9 @@ int device_info_t::max_eus_per_wg(gpu_arch_t gpu_arch) { | |
| switch (gpu_arch) { | ||
| case gpu::intel::compute::gpu_arch_t::xe_hpc: | ||
| case gpu::intel::compute::gpu_arch_t::xe2: | ||
| case gpu_arch_t::xe3p_35_10: | ||
| case gpu_arch_t::xe3p_35_11: | ||
| case gpu_arch_t::xe3p_35_unknown: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: same style for prefixes here and one below? |
||
| case gpu::intel::compute::gpu_arch_t::xe3: return 8; | ||
| case gpu::intel::compute::gpu_arch_t::xe_lp: | ||
| case gpu::intel::compute::gpu_arch_t::xe_hp: | ||
|
|
@@ -158,6 +170,9 @@ int device_info_t::max_subgroup_size(gpu_arch_t gpu_arch) { | |
| switch (gpu_arch) { | ||
| case gpu::intel::compute::gpu_arch_t::xe_hpc: | ||
| case gpu::intel::compute::gpu_arch_t::xe2: | ||
| case gpu_arch_t::xe3p_35_10: | ||
| case gpu_arch_t::xe3p_35_11: | ||
| case gpu_arch_t::xe3p_35_unknown: | ||
| case gpu::intel::compute::gpu_arch_t::xe3: return 32; | ||
| case gpu::intel::compute::gpu_arch_t::xe_lp: | ||
| case gpu::intel::compute::gpu_arch_t::xe_hp: | ||
|
|
@@ -179,6 +194,9 @@ int device_info_t::min_subgroup_size() const { | |
| case gpu_arch_t::xe_hpg: return 8; | ||
| case gpu_arch_t::xe_hpc: | ||
| case gpu_arch_t::xe2: | ||
| case gpu_arch_t::xe3p_35_10: | ||
| case gpu_arch_t::xe3p_35_11: | ||
| case gpu_arch_t::xe3p_35_unknown: | ||
| case gpu_arch_t::xe3: return 16; | ||
| default: return 0; | ||
| } | ||
|
|
@@ -188,6 +206,9 @@ int device_info_t::max_exec_size(gpu_arch_t gpu_arch) { | |
| switch (gpu_arch) { | ||
| case gpu::intel::compute::gpu_arch_t::xe_hpc: | ||
| case gpu::intel::compute::gpu_arch_t::xe2: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_10: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_11: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_unknown: | ||
| case gpu::intel::compute::gpu_arch_t::xe3: return 128; | ||
| default: return 64; | ||
| } | ||
|
|
@@ -221,6 +242,9 @@ int device_info_t::threads_per_eu(gpu_arch_t gpu_arch, bool large_grf_mode) { | |
| case gpu::intel::compute::gpu_arch_t::xe_hpg: | ||
| case gpu::intel::compute::gpu_arch_t::xe_hpc: | ||
| case gpu::intel::compute::gpu_arch_t::xe2: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_10: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_11: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_unknown: | ||
| case gpu::intel::compute::gpu_arch_t::xe3: | ||
| return large_grf_mode ? 4 : 8; | ||
| case gpu::intel::compute::gpu_arch_t::unknown: return 7; | ||
|
|
@@ -238,6 +262,11 @@ int device_info_t::max_slm_size(gpu_arch_t gpu_arch) { | |
| case gpu::intel::compute::gpu_arch_t::xe_hpg: | ||
| case gpu::intel::compute::gpu_arch_t::xe_hpc: | ||
| case gpu::intel::compute::gpu_arch_t::xe2: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_10: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_11: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_unknown: | ||
| slm_size = 3 * (1 << 17); | ||
| break; | ||
| case gpu::intel::compute::gpu_arch_t::xe3: slm_size = (1 << 17); break; | ||
| case gpu::intel::compute::gpu_arch_t::unknown: assert(!"not expected"); | ||
| } | ||
|
|
@@ -269,6 +298,9 @@ size_t device_info_t::icache_size() const { | |
| case gpu::intel::compute::gpu_arch_t::xe_hpc: return 80 * 1024; | ||
| case gpu::intel::compute::gpu_arch_t::xe2: return 96 * 1024; | ||
| case gpu::intel::compute::gpu_arch_t::xe3: return 96 * 1024; | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_10: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_11: | ||
| case gpu::intel::compute::gpu_arch_t::xe3p_35_unknown: return 80 * 1024; | ||
| case gpu::intel::compute::gpu_arch_t::unknown: assert(!"not expected"); | ||
| } | ||
| return 0; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,7 +41,18 @@ namespace gpu { | |
| namespace intel { | ||
| namespace compute { | ||
|
|
||
| enum class gpu_arch_t { unknown, xe_lp, xe_hp, xe_hpg, xe_hpc, xe2, xe3 }; | ||
| enum class gpu_arch_t { | ||
| unknown, | ||
| xe_lp, | ||
| xe_hp, | ||
| xe_hpg, | ||
| xe_hpc, | ||
| xe2, | ||
| xe3, | ||
| xe3p_35_10, | ||
| xe3p_35_11, | ||
| xe3p_35_unknown, | ||
| }; | ||
|
|
||
| // Memory for storing ngen::Product to avoid directly including nGEN because of | ||
| // header dependencies outside of src/gpu/intel. | ||
|
|
@@ -58,6 +69,9 @@ static inline const char *to_string(gpu_arch_t arch) { | |
| CASE(xe_hpc); | ||
| CASE(xe2); | ||
| CASE(xe3); | ||
| CASE(xe3p_35_10); | ||
| CASE(xe3p_35_11); | ||
| CASE(xe3p_35_unknown); | ||
| return "unknown"; | ||
| #undef CASE | ||
| } | ||
|
|
@@ -71,6 +85,9 @@ static inline gpu_arch_t str2gpu_arch(const char *str) { | |
| CASE(xe_hpc); | ||
| CASE(xe2); | ||
| CASE(xe3); | ||
| CASE(xe3p_35_10); | ||
| CASE(xe3p_35_11); | ||
| CASE(xe3p_35_unknown); | ||
| return gpu_arch_t::unknown; | ||
| #undef CASE | ||
| } | ||
|
|
@@ -253,6 +270,8 @@ struct device_info_t { | |
|
|
||
| bool has_native(data_type_t type) const; | ||
|
|
||
| bool is_efficient_64bit() const { return is_efficient_64bit_; } | ||
|
|
||
| const std::vector<uint8_t> &get_cache_blob() const { | ||
| return serialized_device_info_.get_data(); | ||
| } | ||
|
|
@@ -282,6 +301,7 @@ struct device_info_t { | |
| bool mayiuse_systolic_ = false; | ||
| bool mayiuse_ngen_kernels_ = false; | ||
| bool mayiuse_system_memory_allocators_ = false; | ||
| bool is_efficient_64bit_ = false; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be nice to have a comment explaining what is |
||
|
|
||
| std::string name_; | ||
| xpu::runtime_version_t runtime_version_; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Follow TODO?