Skip to content

Commit 3e4be09

Browse files
committed
feat: add 1 through 4 channel support for dxt ext (v0.2.0)
1 parent 38d6c2a commit 3e4be09

File tree

3 files changed

+223
-95
lines changed

3 files changed

+223
-95
lines changed

docs/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ the API is complete. It just means we won't break what currently exists.
9292
* Mesh Builder v0.1.0 (pl_mesh_ext.h)
9393
* Shader Variant v0.2.0 (pl_shader_variant_ext.h)
9494
* DDS v0.2.0 (pl_dds_ext.h)
95+
* DXT v0.2.0 (pl_dxt_ext.h)
9596

9697
## Unstable Extensions
9798

@@ -102,4 +103,3 @@ the API is complete. It just means we won't break what currently exists.
102103
* Renderer v0.2.1 (pl_renderer_ext.h)
103104
* Dear ImGui v0.1.0 (pl_dear_imgui_ext.h)
104105
* Animation v0.1.0 (pl_animation_ext.h)
105-
* DXT v0.1.0 (pl_dxt_ext.h)

extensions/pl_dxt_ext.c

Lines changed: 206 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
/*
66
Index of this file:
77
// [SECTION] includes
8+
// [SECTION] internal api
89
// [SECTION] public api implementation
910
// [SECTION] extension loading
1011
// [SECTION] unity build
@@ -19,12 +20,37 @@ Index of this file:
1920
#include "pl.h"
2021
#include "pl_dxt_ext.h"
2122

22-
// extensions
23-
#include "pl_graphics_ext.h"
24-
2523
// libraries
2624
#include "stb_dxt.h"
2725

26+
//-----------------------------------------------------------------------------
27+
// [SECTION] internal api
28+
//-----------------------------------------------------------------------------
29+
30+
static inline void
31+
pl__dxt_sample(const uint8_t* puData, uint32_t uChannels, uint32_t uWidth, uint32_t uX, uint32_t uY, uint8_t* auOut)
32+
{
33+
const uint8_t* ptSource = puData + (uY * uWidth + uX) * uChannels;
34+
for(uint32_t uChannel = 0; uChannel < uChannels; uChannel++)
35+
auOut[uChannel] = ptSource[uChannel];
36+
}
37+
38+
static inline void
39+
pl__dxt_sample_wrap(const uint8_t* puData, uint32_t uChannels, uint32_t uWidth, uint32_t uHeight, uint32_t uX, uint32_t uY, uint8_t* auOut)
40+
{
41+
uX = pl_clampu(0, uX, uWidth - 1);
42+
uY = pl_clampu(0, uY, uHeight - 1);
43+
const uint8_t* ptSource = puData + (uY * uWidth + uX) * uChannels;
44+
for(uint32_t uChannel = 0; uChannel < uChannels; uChannel++)
45+
auOut[uChannel] = ptSource[uChannel];
46+
}
47+
48+
static inline void
49+
pl__dxt_copy(const uint8_t* puData, uint32_t uDxtBlockWidth, uint32_t uX, uint32_t uY, uint8_t* auOut)
50+
{
51+
memcpy(auOut + (uY * 4 + uX) * uDxtBlockWidth, puData, uDxtBlockWidth);
52+
}
53+
2854
//-----------------------------------------------------------------------------
2955
// [SECTION] public api implementation
3056
//-----------------------------------------------------------------------------
@@ -38,16 +64,7 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
3864
const uint32_t uBlockSize = (ptInfo->uChannels == 2 || ptInfo->uChannels == 4 ? 16 : 8);
3965

4066
if(szSizeOut)
41-
{
42-
if(ptInfo->uChannels == 4)
43-
*szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight;
44-
else if(ptInfo->uChannels == 3)
45-
*szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight / 2;
46-
else
47-
{
48-
PL_ASSERT(false && "Only supporting 3 & 4 channels for now");
49-
}
50-
}
67+
*szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight * ptInfo->uChannels * uBlockSize / (16 * ptInfo->uChannels);
5168

5269
if(puDataOut == NULL)
5370
return;
@@ -63,6 +80,10 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
6380
const uint32_t uOverflowV = (4 - (uAdjustedImageHeight - ptInfo->uHeight)) % 4;
6481
const uint32_t uWrapPosH = ptInfo->uWidth - uOverflowH;
6582
const uint32_t uWrapPosV = ptInfo->uHeight - uOverflowV;
83+
const uint32_t uDxtBlockWidth = ptInfo->uChannels > 2 ? 4 : ptInfo->uChannels;
84+
85+
uint32_t uBlocksPerRow = uAdjustedImageWidth / 4;
86+
uint32_t uBlocksPerColumn = uAdjustedImageHeight / 4;
6687

6788
uint8_t auPadded[4] = {0};
6889

@@ -74,130 +95,223 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
7495
if(ptInfo->uChannels == 4)
7596
iIncludeAlpha = 1;
7697

77-
if(uOverflowH == 0 && uOverflowV == 0 && ptInfo->uChannels > 2)
98+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~inner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99+
100+
if(ptInfo->uChannels > 2)
78101
{
79102

80103
for (uint32_t uRowStart = 0; uRowStart < uAdjustedImageHeight; uRowStart += 4)
81104
{
105+
const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4);
106+
const uint32_t uBlockRowBytesOffset = (ptInfo->uWidth * uBytesPerPixel * uRowStart);
82107
for (uint32_t uColumnStart = 0; uColumnStart < uAdjustedImageWidth; uColumnStart += 4)
83108
{
109+
110+
uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4;
111+
uOffset = uBlockSize * uBlockIndex;
112+
113+
const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel;
114+
puData = ptInfo->puData + uOffsetBytes;
115+
84116
for (uint32_t uRow = 0; uRow < 4; uRow++)
85117
{
86118
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
87119
{
88-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * ptInfo->uChannels;
89-
for(uint32_t uChannel = 0; uChannel < ptInfo->uChannels; uChannel++)
90-
auPadded[uChannel] = ptSource[uChannel];
91-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * 4, auPadded, 4);
120+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
121+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
92122
}
93123
}
94124

95125
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
96126
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
97-
98-
uOffset += uBlockSize;
99-
puData += uBytesPerPixel * 4;
100127
}
101-
puData += ptInfo->uWidth * uBytesPerPixel * 3; // by 3 since we already moved first row across
102128
}
103129
}
104-
else // slow path
130+
else if(ptInfo->uChannels == 2)
105131
{
106-
107132
for (uint32_t uRowStart = 0; uRowStart < uAdjustedImageHeight; uRowStart += 4)
108133
{
134+
const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4);
135+
const uint32_t uBlockRowBytesOffset = (ptInfo->uWidth * uBytesPerPixel * uRowStart);
109136
for (uint32_t uColumnStart = 0; uColumnStart < uAdjustedImageWidth; uColumnStart += 4)
110137
{
111-
if (uRowStart >= uWrapPosV && uColumnStart >= uWrapPosH) // overflow on bottom right corner
112-
{
113-
memset(auInDataBuf, 255, 64);
114138

115-
for (uint32_t uRow = 0; uRow < uOverflowV; uRow++)
139+
uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4;
140+
uOffset = uBlockSize * uBlockIndex;
141+
142+
const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel;
143+
puData = ptInfo->puData + uOffsetBytes;
144+
145+
for (uint32_t uRow = 0; uRow < 4; uRow++)
146+
{
147+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
116148
{
117-
for (uint32_t uColumn = 0; uColumn < uOverflowH; uColumn++)
118-
{
119-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
120-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
121-
}
149+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
150+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
122151
}
123152
}
124-
else if(uColumnStart >= uWrapPosH) // overflow on right
125-
{
126-
memset(auInDataBuf, 0, 64);
127153

128-
for (uint32_t uRow = 0; uRow < 4; uRow++)
154+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
155+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
156+
}
157+
}
158+
}
159+
else if(ptInfo->uChannels == 1)
160+
{
161+
for (uint32_t uRowStart = 0; uRowStart < uAdjustedImageHeight; uRowStart += 4)
162+
{
163+
const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4);
164+
const uint32_t uBlockRowBytesOffset = (ptInfo->uWidth * uBytesPerPixel * uRowStart);
165+
for (uint32_t uColumnStart = 0; uColumnStart < uAdjustedImageWidth; uColumnStart += 4)
166+
{
167+
uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4;
168+
uOffset = uBlockSize * uBlockIndex;
169+
170+
const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel;
171+
puData = ptInfo->puData + uOffsetBytes;
172+
173+
for (uint32_t uRow = 0; uRow < 4; uRow++)
174+
{
175+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
129176
{
130-
for (uint32_t uColumn = 0; uColumn < uOverflowH; uColumn++)
131-
{
132-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
133-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
134-
135-
if(uColumn == uOverflowH - 1)
136-
{
137-
for(uint32_t iOverflow = 0; iOverflow < uOverflowV; iOverflow++)
138-
{
139-
memcpy(auInDataBuf + (uRow * 4 + uColumn + iOverflow + 1) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
140-
}
141-
}
142-
}
177+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
178+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
143179
}
144180
}
145-
else if (uRowStart >= uWrapPosV) // overflow on bottom
181+
182+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
183+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
184+
}
185+
}
186+
}
187+
188+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~right fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189+
190+
if(uOverflowH > 0)
191+
{
192+
for (uint32_t uRowStart = 0; uRowStart < uWrapPosV; uRowStart += 4)
193+
{
194+
uint32_t uBlockIndex = uRowStart * uAdjustedImageWidth / 16;
195+
uBlockIndex += uWrapPosH / 4;
196+
uOffset = uBlockSize * uBlockIndex;
197+
const uint32_t uOffsetBytes = (ptInfo->uWidth * uBytesPerPixel * uRowStart) + uWrapPosH * uBytesPerPixel;
198+
puData = ptInfo->puData + uOffsetBytes;
199+
200+
memset(auInDataBuf, 255, 64);
201+
202+
for (uint32_t uRow = 0; uRow < 4; uRow++)
203+
{
204+
for (uint32_t uColumn = 0; uColumn < uOverflowH; uColumn++)
146205
{
147-
memset(auInDataBuf, 255, 64);
206+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
207+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
148208

149-
for (uint32_t uRow = 0; uRow < uOverflowV; uRow++)
209+
if(uColumn == uOverflowH - 1)
150210
{
151-
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
152-
{
153-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
154-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
155-
if(uRow == uOverflowV - 1)
156-
{
157-
for(uint32_t iOverflow = 0; iOverflow < uOverflowV; iOverflow++)
158-
{
159-
memcpy(auInDataBuf + ((uRow + iOverflow + 1) * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
160-
}
161-
}
162-
}
211+
for(uint32_t iOverflow = 0; iOverflow < 4 - uOverflowH; iOverflow++)
212+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn + iOverflow + 1, uRow, auInDataBuf);
163213
}
164214
}
215+
}
216+
217+
memset(auOutDataBuf, 0, 16);
218+
219+
switch (ptInfo->uChannels)
220+
{
221+
case 3:
222+
case 4:
223+
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
224+
break;
225+
case 1:
226+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
227+
break;
228+
case 2:
229+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
230+
break;
231+
}
232+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
233+
}
234+
235+
}
236+
237+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~bottom fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
165238

166-
else
239+
if(uOverflowV > 0)
240+
{
241+
for (uint32_t uColumnStart = 0; uColumnStart < uWrapPosH; uColumnStart += 4)
242+
{
243+
uint32_t uBlockIndex = uColumnStart / 4 + uBlocksPerRow * uWrapPosV / 4;
244+
uOffset = uBlockSize * uBlockIndex;
245+
const uint32_t uOffsetBytes = (ptInfo->uWidth * uBytesPerPixel * uWrapPosV) + uColumnStart * uBytesPerPixel;
246+
puData = ptInfo->puData + uOffsetBytes;
247+
248+
memset(auInDataBuf, 255, 64);
249+
250+
for (uint32_t uRow = 0; uRow < uOverflowV; uRow++)
251+
{
252+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
167253
{
168-
for (uint32_t uRow = 0; uRow < 4; uRow++)
254+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
255+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
256+
if(uRow == uOverflowV - 1)
169257
{
170-
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
171-
{
172-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
173-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
174-
}
258+
for(uint32_t iOverflow = 0; iOverflow < 4 - uOverflowV; iOverflow++)
259+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow + iOverflow + 1, auInDataBuf);
175260
}
176261
}
262+
}
177263

178-
switch (ptInfo->uChannels)
179-
{
180-
case 3:
181-
case 4:
182-
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
183-
break;
184-
case 1:
185-
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
186-
break;
187-
case 2:
188-
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
189-
break;
190-
}
191-
memcpy(&puDataOut[uOffset], auOutDataBuf, 16);
264+
memset(auOutDataBuf, 0, 16);
265+
266+
switch (ptInfo->uChannels)
267+
{
268+
case 3:
269+
case 4:
270+
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
271+
break;
272+
case 1:
273+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
274+
break;
275+
case 2:
276+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
277+
break;
278+
}
279+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
280+
}
281+
}
282+
283+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~corner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284+
285+
if(uOverflowV > 0 && uOverflowH > 0)
286+
{
287+
uint32_t uBlockIndex = uBlocksPerRow * uBlocksPerColumn - 1;
288+
uOffset = uBlockSize * uBlockIndex;
289+
290+
memset(auInDataBuf, 255, 64);
291+
memset(auOutDataBuf, 0, 16);
192292

193-
uOffset += 16;
194-
if(uColumnStart >= uWrapPosH && uOverflowH > 0)
195-
puData += uBytesPerPixel * uOverflowH;
196-
else
197-
puData += uBytesPerPixel * 4;
293+
for (uint32_t uRow = 0; uRow < 4; uRow++)
294+
{
295+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
296+
{
297+
pl__dxt_sample_wrap(ptInfo->puData, ptInfo->uChannels, ptInfo->uWidth, ptInfo->uHeight, uWrapPosH + uColumn, uWrapPosV + uRow, auPadded);
298+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
198299
}
199-
puData += ptInfo->uWidth * uBytesPerPixel * 3; // by 3 since we already moved first row across
200300
}
301+
switch (ptInfo->uChannels)
302+
{
303+
case 3:
304+
case 4:
305+
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
306+
break;
307+
case 1:
308+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
309+
break;
310+
case 2:
311+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
312+
break;
313+
}
314+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
201315
}
202316
}
203317

0 commit comments

Comments
 (0)