Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit 038e35c

Browse files
committed
Update llpc from commit f1bdd306
Add Navi48 support
1 parent eaf7c1d commit 038e35c

File tree

1,287 files changed

+29999
-89
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,287 files changed

+29999
-89
lines changed

.typos.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,5 @@ dne = "dne"
2929
offen = "offen"
3030
varing = "varing"
3131
Derivate = "Derivate"
32+
TESE = "TESE"
33+
SER = "SER"

cmake/vkgc.cmake

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##
22
#######################################################################################################################
33
#
4-
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
77
# of this software and associated documentation files (the "Software"), to
@@ -38,6 +38,12 @@ if(LLPC_BUILD_GFX11)
3838
endif()
3939
#endif
4040

41+
#if LLPC_BUILD_GFX12
42+
if(LLPC_BUILD_GFX12)
43+
target_compile_definitions(vkgc_headers INTERFACE LLPC_BUILD_GFX12)
44+
endif()
45+
#endif
46+
4147
#if LLPC_RAY_TRACING
4248
if(LLPC_RAY_TRACING)
4349
if(NOT LLPC_IS_STANDALONE)

compilerutils/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##
22
#######################################################################################################################
33
#
4-
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
77
# of this software and associated documentation files (the "Software"), to

compilerutils/plugin/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##
22
#######################################################################################################################
33
#
4-
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
77
# of this software and associated documentation files (the "Software"), to

compilerutils/test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##
22
#######################################################################################################################
33
#
4-
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
77
# of this software and associated documentation files (the "Software"), to

gfxruntime/src/shaders/AdvancedBlend.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
***********************************************************************************************************************
33
*
4-
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
* Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to

include/vkgcDefs.h

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,20 @@ struct CompileTimeConst {
453453
} values; ///< The compile-time values for this slot.
454454
};
455455

456+
#if LLPC_BUILD_GFX12
457+
/// Handle temporal hint
458+
enum TemporalHintOpType {
459+
TemporalHintAtmWrite = 0,
460+
TemporalHintImageRead = 4,
461+
TemporalHintImageWrite = 8,
462+
TemporalHintTessFactorWrite = 12,
463+
TemporalHintTessRead = 16,
464+
TemporalHintTessWrite = 20,
465+
TemporalHintBufferRead = 24,
466+
TemporalHintBufferWrite = 28,
467+
};
468+
#endif
469+
456470
/// Represents info of compile-time constants within a shader of a specified stage.
457471
struct CompileConstInfo {
458472
unsigned numCompileTimeConstants; ///< Number of compile time constants.
@@ -496,7 +510,11 @@ struct PipelineOptions {
496510
bool reverseThreadGroup; ///< If set, enable thread group reversing
497511
bool internalRtShaders; ///< Whether this pipeline has internal raytracing shaders
498512
unsigned forceNonUniformResourceIndexStageMask; ///< Mask of the stage to force using non-uniform resource index.
513+
#if LLPC_BUILD_GFX12
514+
bool expertSchedulingMode;
515+
#else
499516
bool reserved16;
517+
#endif
500518

501519
struct GLState {
502520
bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate
@@ -519,14 +537,27 @@ struct PipelineOptions {
519537
} glState;
520538
const auto &getGlState() const { return glState; }
521539

540+
#if LLPC_BUILD_GFX12
541+
unsigned cacheScopePolicyControl; ///< Control cache scope policy. attributes-through-memory read/write is
542+
/// available.
543+
#else
522544
unsigned reserved20;
545+
#endif
523546
bool enablePrimGeneratedQuery; ///< If set, primitive generated query is enabled
524547
bool disablePerCompFetch; ///< Disable per component fetch in uber fetch shader.
525548
bool reserved21;
526549
bool optimizePointSizeWrite; ///< If set, the write of PointSize in the last vertex processing stage will be
527550
///< eliminated if the write value is 1.0.
528551
CompileConstInfo *compileConstInfo; ///< Compile time constant data.
552+
#if LLPC_BUILD_GFX12
553+
unsigned temporalHintControl; ///< Override value for temporal hint. A load/store occupies 4 bits. The highest bit
554+
/// of 4 bits marks whether to override temporal hint.
555+
/// Arrange from the low bit to high bit in the following order:
556+
/// TemporalHintAtmWrite,TemporalHintImageRead, TemporalHintImageWrite,
557+
/// TemporalHintTessFactorWrite, TemporalHintTessRead, TemporalHintTessWrite
558+
#else
529559
unsigned reserved22;
560+
#endif
530561
bool padBufferSizeToNextDword; ///< Vulkan only, set if the driver rounds the buffer size up the next dword
531562
};
532563

@@ -776,6 +807,28 @@ inline unsigned compact32(ShaderHash hash) {
776807
/// Represent a pipeline option which can be automatic as well as explicitly set.
777808
enum InvariantLoads : unsigned { Auto = 0, EnableOptimization = 1, DisableOptimization = 2, ClearInvariants = 3 };
778809

810+
#if LLPC_BUILD_GFX12
811+
/// Control cache policy: whether to use LLC (last level cache, aka set noAlloc).
812+
struct CachePolicyLlc {
813+
union NoAllocResource {
814+
struct {
815+
unsigned set : 5; ///< Resource set
816+
unsigned binding : 16; ///< Resource binding
817+
unsigned noAlloc : 1; ///< llc_noAlloc policy
818+
unsigned : 10;
819+
};
820+
struct {
821+
unsigned resourceId : 21; ///< Resource set
822+
unsigned : 11;
823+
};
824+
unsigned u32All;
825+
};
826+
827+
const unsigned *noAllocs; // Set for each resource.
828+
unsigned resourceCount; // The count of resources
829+
};
830+
#endif
831+
779832
/// Represents per shader stage options.
780833
struct PipelineShaderOptions {
781834
ShaderHash clientHash; ///< Client-supplied unique shader hash. A value of zero indicates that LLPC should
@@ -918,6 +971,10 @@ struct PipelineShaderOptions {
918971
/// Application workaround: forward propagate NoContraction decoration to any related FAdd operation.
919972
bool forwardPropagateNoContract;
920973

974+
#if LLPC_BUILD_GFX12
975+
/// Enable round-robin mode for waves in workgroup.
976+
bool workgroupRoundRobin;
977+
#endif
921978
/// Binding ID offset of default uniform block
922979
unsigned constantBufferBindingOffset;
923980

@@ -931,6 +988,15 @@ struct PipelineShaderOptions {
931988
/// will be assigned values as if they were decorated as DeviceIndex.
932989
bool viewIndexFromDeviceIndex;
933990

991+
#if LLPC_BUILD_GFX12
992+
/// Control LLC cache policy
993+
CachePolicyLlc cachePolicyLlc;
994+
995+
/// Override value for temporal hint. A load/store occupies 4 bits. The highest bit of 4 bits marks whether to
996+
/// override temporal hint.
997+
unsigned temporalHintShaderControl;
998+
#endif
999+
9341000
/// Indicate whether the vertex shader is used by transform pipeline
9351001
bool enableTransformShader;
9361002

@@ -1471,13 +1537,18 @@ struct RayTracingPipelineBuildInfo {
14711537
unsigned pipelineLibStageMask; ///< Pipeline library stage mask
14721538
//@}
14731539

1474-
unsigned payloadSizeMaxInLib; ///< Pipeline library maxPayloadSize
1475-
unsigned attributeSizeMaxInLib; ///< Pipeline library maxAttributeSize
1476-
bool isReplay; ///< Pipeline is created for replaying
1477-
const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be
1478-
/// stored inside the ELF
1479-
size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data
1480-
unsigned cpsFlags; ///< Cps feature flags
1540+
unsigned payloadSizeMaxInLib; ///< Pipeline library maxPayloadSize
1541+
unsigned attributeSizeMaxInLib; ///< Pipeline library maxAttributeSize
1542+
bool isReplay; ///< Pipeline is created for replaying
1543+
const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be
1544+
/// stored inside the ELF
1545+
size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data
1546+
unsigned cpsFlags; ///< Cps feature flags
1547+
#if LLPC_BUILD_GFX12
1548+
bool disableDynamicVgpr; ///< Whether to disable dynamic VGPR mode for continuations. If not set, dVGPR mode is
1549+
/// enabled by default.
1550+
unsigned dynamicVgprBlockSize; ///< The size of the VGPR allocation granule used in dVGPR mode.
1551+
#endif
14811552
GpurtOption *pGpurtOptions; ///< Array of GPURT options
14821553
unsigned gpurtOptionCount; ///< Number of GPURT options
14831554
bool rtIgnoreDeclaredPayloadSize; ///< Ignore the declared payload size in the shader to address issues with Proton.

lgc/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@ target_sources(LLVMlgc PRIVATE
197197
lowering/LowerRayQueryWrapper.cpp
198198
)
199199

200+
#if LLPC_BUILD_GFX12
201+
if(LLPC_BUILD_GFX12)
202+
target_sources(LLVMlgc PRIVATE lowering/AddBufferOperationMetadata.cpp)
203+
endif()
204+
#endif
205+
200206
# include/lgc/lowering
201207
target_sources(LLVMlgc PRIVATE
202208
include/lgc/lowering/AddLoopMetadata.h
@@ -235,6 +241,12 @@ target_sources(LLVMlgc PRIVATE
235241
include/lgc/lowering/WorkaroundDsSubdwordWrite.h
236242
)
237243

244+
#if LLPC_BUILD_GFX12
245+
if(LLPC_BUILD_GFX12)
246+
target_sources(LLVMlgc PRIVATE include/lgc/lowering/AddBufferOperationMetadata.h)
247+
endif()
248+
#endif
249+
238250
# lgc/state
239251
target_sources(LLVMlgc PRIVATE
240252
state/Compiler.cpp

lgc/builder/BuilderImpl.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,51 @@ Type *BuilderBase::getConditionallyVectorizedTy(Type *elementTy, Type *maybeVecT
7474
// @param vector2 : The float vector 2
7575
// @param instName : Name to give instruction(s)
7676
Value *BuilderImpl::CreateDotProduct(Value *const vector1, Value *const vector2, const Twine &instName) {
77+
#if LLPC_BUILD_GFX12
78+
if (getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 12) {
79+
// Use a chain of v_dot2_f16_f16/v_dot2_bf16_bf16 on gfx12+.
80+
//
81+
// Note: GFX11 has this instruction, but its precision doesn't satisfy Vulkan requirements.
82+
//
83+
// Note: GFX10 chips may have v_dot2_f32_f16, which we could consider generating in cases where bitexact results
84+
// are not required.
85+
//
86+
// Note: v_dot2_f16_f16/v_dot2_bf16_bf16 only respects RTE mode according to HW spec. We must check the
87+
// specified rounding mode before using it. Also, v_dot2_f16_f16/v_dot2_bf16_bf16 is not IEEE compliant
88+
// so we must check NSZ as well.
89+
const auto fp16RoundMode =
90+
getPipelineState()->getShaderModes()->getCommonShaderMode(m_shaderStage.value()).fp16RoundMode;
91+
const auto vectorTy = dyn_cast<FixedVectorType>(vector1->getType());
92+
if (vectorTy && (vectorTy->getScalarSizeInBits() == 16) &&
93+
(fp16RoundMode == FpRoundMode::DontCare || fp16RoundMode == FpRoundMode::Even) &&
94+
getFastMathFlags().noSignedZeros()) {
95+
int compCount = vectorTy->getNumElements();
96+
Value *result = nullptr;
97+
Type *basicType = getHalfTy();
98+
Intrinsic::AMDGCNIntrinsics inst = Intrinsic::amdgcn_fdot2_f16_f16;
99+
if (vectorTy->getScalarType()->isBFloatTy()) {
100+
basicType = getBFloatTy();
101+
inst = Intrinsic::amdgcn_fdot2_bf16_bf16;
102+
}
103+
104+
if (compCount % 2 == 0) {
105+
result = ConstantFP::get(basicType, 0.0);
106+
} else {
107+
// If the component count is odd, prefer feeding the last product (odd one out) as initial value.
108+
Value *lhs = CreateExtractElement(vector1, compCount - 1);
109+
Value *rhs = CreateExtractElement(vector2, compCount - 1);
110+
result = CreateFMul(lhs, rhs);
111+
}
112+
113+
for (int i = 0; i + 1 < compCount; i += 2) {
114+
Value *lhs = CreateShuffleVector(vector1, {i, i + 1});
115+
Value *rhs = CreateShuffleVector(vector2, {i, i + 1});
116+
result = CreateIntrinsic(basicType, inst, {lhs, rhs, result});
117+
}
118+
return result;
119+
}
120+
}
121+
#endif
77122

78123
Value *product = CreateFMul(vector1, vector2);
79124
if (!isa<VectorType>(product->getType()))
@@ -254,6 +299,9 @@ Value *BuilderImpl::CreateIntegerDotProduct(Value *vector1, Value *vector2, Valu
254299
bool BuilderImpl::supportWaveWideBPermute(ShaderStageEnum shaderStage) const {
255300
auto gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion().major;
256301
auto supportBPermute = gfxIp == 8 || gfxIp == 9;
302+
#if LLPC_BUILD_GFX12
303+
supportBPermute = supportBPermute || (gfxIp == 12);
304+
#endif
257305
auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage);
258306
supportBPermute = supportBPermute || waveSize == 32;
259307
return supportBPermute;
@@ -265,6 +313,14 @@ bool BuilderImpl::supportPermLane64Dpp() const {
265313
return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 11;
266314
}
267315

316+
#if LLPC_BUILD_GFX12
317+
// =====================================================================================================================
318+
// Get whether the context we are building in supports permute lane var operations.
319+
bool BuilderImpl::supportPermLaneVar() const {
320+
return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 12;
321+
}
322+
#endif
323+
268324
// =====================================================================================================================
269325
// Create an "if..endif" or "if..else..endif" structure. The current basic block becomes the "endif" block, and all
270326
// instructions in that block before the insert point are moved to the "if" block. The insert point is moved to

lgc/builder/DescBuilder.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,16 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc, Value *stride) {
422422
sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT;
423423
sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2;
424424
assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC);
425-
} else {
425+
}
426+
#if LLPC_BUILD_GFX12
427+
else if (gfxIp.major == 12) {
428+
sqBufRsrcWord3.gfx12.format = BUF_FORMAT_32_UINT;
429+
sqBufRsrcWord3.gfx12.compressionEn = 1;
430+
sqBufRsrcWord3.gfx12.oobSelect = stride ? 3 : 2;
431+
assert(sqBufRsrcWord3.u32All == 0x22014FAC || sqBufRsrcWord3.u32All == 0x32014FAC);
432+
}
433+
#endif
434+
else {
426435
llvm_unreachable("Not implemented!");
427436
}
428437
bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3);

0 commit comments

Comments
 (0)