@@ -491,26 +491,28 @@ Examples:
491491 i32 %byte_offset,
492492 i32 0)
493493
494- Texture and Typed Buffer Stores
495- -------------------------------
494+ Stores
495+ ------
496496
497- *relevant types: Textures and TypedBuffer *
497+ *relevant types: Textures and Buffer *
498498
499- The `TextureStore `_ and `BufferStore `_ DXIL operations always write all four
500- 32-bit components to a texture or a typed buffer. While both operations include
501- a mask parameter, it is specified that the mask must cover all components when
502- used with these types .
499+ The `TextureStore `_, ` BufferStore `_, and `RawBufferStore `_ DXIL operations
500+ write four components to a texture or a buffer. These include a mask argument
501+ that is used when fewer than 4 components are written, but notably this only
502+ takes on the contiguous x, xy, xyz, and xyzw values .
503503
504- The store operations that we define as intrinsics behave similarly, and will
505- only accept writes to the whole of the contained type. This differs from the
506- loads above, but this makes sense to do from a semantics preserving point of
507- view. Thus, texture and buffer stores may only operate on 4-element vectors of
508- types that are 32-bits or fewer, such as ``<4 x i32> ``, ``<4 x float> ``, and
509- ``<4 x half> ``, and 2 element vectors of 64-bit types like ``<2 x double> `` and
510- ``<2 x i64> ``.
504+ We define the LLVM store intrinsics to accept vectors when storing multiple
505+ components rather than using `undef ` and a mask, but otherwise match the DXIL
506+ ops fairly closely.
511507
512- .. _BufferStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
513508.. _TextureStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#texturestore
509+ .. _BufferStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
510+ .. _RawBufferStore : https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferstore
511+
512+ For TypedBuffer, we only need one coordinate, and we must always write a vector
513+ since partial writes aren't possible. Similarly to the load operations
514+ described above, we handle 64-bit types specially and only handle 2-element
515+ vectors rather than 4.
514516
515517Examples:
516518
@@ -548,3 +550,85 @@ Examples:
548550 target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
549551 call void @llvm.dx.resource.store.typedbuffer.tdx.Buffer_v2f64_1_0_0t(
550552 target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
553+
554+ For RawBuffer, we need two indices and we accept scalars and vectors of 4 or
555+ fewer elements. Note that we do allow vectors of 4 64-bit elements here.
556+
557+ Examples:
558+
559+ .. list-table :: ``@llvm.dx.resource.store.rawbuffer``
560+ :header-rows: 1
561+
562+ * - Argument
563+ -
564+ - Type
565+ - Description
566+ * - Return value
567+ -
568+ - ``void ``
569+ -
570+ * - ``%buffer ``
571+ - 0
572+ - ``target(dx.RawBuffer, ...) ``
573+ - The buffer to store into
574+ * - ``%index ``
575+ - 1
576+ - ``i32 ``
577+ - Index into the buffer
578+ * - ``%offset ``
579+ - 2
580+ - ``i32 ``
581+ - Byte offset into structured buffer elements
582+ * - ``%data ``
583+ - 3
584+ - Scalar or vector
585+ - The data to store
586+
587+ Examples:
588+
589+ .. code-block :: llvm
590+
591+ ; float
592+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f32_1_0_0t.f32(
593+ target("dx.RawBuffer", float, 1, 0, 0) %buffer,
594+ i32 %index, i32 0, float %data)
595+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.f32(
596+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
597+ i32 %index, i32 0, float %data)
598+
599+ ; float4
600+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f32_1_0_0t.v4f32(
601+ target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
602+ i32 %index, i32 0, <4 x float> %data)
603+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f32(
604+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
605+ i32 %index, i32 0, <4 x float> %data)
606+
607+ ; struct S0 { float4 f; int4 i; }
608+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
609+ target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
610+ i32 %index, i32 0, <4 x float> %data0)
611+ call void @llvm.dx.resource.store.rawbuffer.v4i32(
612+ target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
613+ i32 %index, i32 16, <4 x i32> %data1)
614+
615+ ; struct Q { float4 f; int3 i; }
616+ ; struct R { int z; S x; }
617+ call void @llvm.dx.resource.store.rawbuffer.i32(
618+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
619+ %buffer,
620+ i32 %index, i32 0, i32 %data0)
621+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
622+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
623+ %buffer,
624+ i32 %index, i32 4, <4 x float> %data1)
625+ call void @llvm.dx.resource.store.rawbuffer.v3f16(
626+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
627+ %buffer,
628+ i32 %index, i32 20, <3 x half> %data2)
629+
630+ ; byteaddressbuf.Store<int64_t4>
631+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f64(
632+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
633+ i32 %index, i32 0, <4 x double> %data)
634+
0 commit comments