Skip to content

Commit 6d09d80

Browse files
committed
feat: add 1 through 4 channel support for dxt ext (v0.2.0)
1 parent 38d6c2a commit 6d09d80

File tree

3 files changed

+226
-94
lines changed

3 files changed

+226
-94
lines changed

docs/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ the API is complete. It just means we won't break what currently exists.
9292
* Mesh Builder v0.1.0 (pl_mesh_ext.h)
9393
* Shader Variant v0.2.0 (pl_shader_variant_ext.h)
9494
* DDS v0.2.0 (pl_dds_ext.h)
95+
* DXT v0.2.0 (pl_dxt_ext.h)
9596

9697
## Unstable Extensions
9798

@@ -102,4 +103,3 @@ the API is complete. It just means we won't break what currently exists.
102103
* Renderer v0.2.1 (pl_renderer_ext.h)
103104
* Dear ImGui v0.1.0 (pl_dear_imgui_ext.h)
104105
* Animation v0.1.0 (pl_animation_ext.h)
105-
* DXT v0.1.0 (pl_dxt_ext.h)

extensions/pl_dxt_ext.c

Lines changed: 209 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
/*
66
Index of this file:
77
// [SECTION] includes
8+
// [SECTION] internal api
89
// [SECTION] public api implementation
910
// [SECTION] extension loading
1011
// [SECTION] unity build
@@ -19,12 +20,41 @@ Index of this file:
1920
#include "pl.h"
2021
#include "pl_dxt_ext.h"
2122

22-
// extensions
23-
#include "pl_graphics_ext.h"
23+
// libs
24+
#define PL_MATH_INCLUDE_FUNCTIONS
25+
#include "pl_math.h"
2426

2527
// libraries
2628
#include "stb_dxt.h"
2729

30+
//-----------------------------------------------------------------------------
31+
// [SECTION] internal api
32+
//-----------------------------------------------------------------------------
33+
34+
static inline void
35+
pl__dxt_sample(const uint8_t* puData, uint32_t uChannels, uint32_t uWidth, uint32_t uX, uint32_t uY, uint8_t* auOut)
36+
{
37+
const uint8_t* ptSource = puData + (uY * uWidth + uX) * uChannels;
38+
for(uint32_t uChannel = 0; uChannel < uChannels; uChannel++)
39+
auOut[uChannel] = ptSource[uChannel];
40+
}
41+
42+
static inline void
43+
pl__dxt_sample_wrap(const uint8_t* puData, uint32_t uChannels, uint32_t uWidth, uint32_t uHeight, uint32_t uX, uint32_t uY, uint8_t* auOut)
44+
{
45+
uX = pl_clampu(0, uX, uWidth - 1);
46+
uY = pl_clampu(0, uY, uHeight - 1);
47+
const uint8_t* ptSource = puData + (uY * uWidth + uX) * uChannels;
48+
for(uint32_t uChannel = 0; uChannel < uChannels; uChannel++)
49+
auOut[uChannel] = ptSource[uChannel];
50+
}
51+
52+
static inline void
53+
pl__dxt_copy(const uint8_t* puData, uint32_t uDxtBlockWidth, uint32_t uX, uint32_t uY, uint8_t* auOut)
54+
{
55+
memcpy(auOut + (uY * 4 + uX) * uDxtBlockWidth, puData, uDxtBlockWidth);
56+
}
57+
2858
//-----------------------------------------------------------------------------
2959
// [SECTION] public api implementation
3060
//-----------------------------------------------------------------------------
@@ -38,16 +68,7 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
3868
const uint32_t uBlockSize = (ptInfo->uChannels == 2 || ptInfo->uChannels == 4 ? 16 : 8);
3969

4070
if(szSizeOut)
41-
{
42-
if(ptInfo->uChannels == 4)
43-
*szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight;
44-
else if(ptInfo->uChannels == 3)
45-
*szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight / 2;
46-
else
47-
{
48-
PL_ASSERT(false && "Only supporting 3 & 4 channels for now");
49-
}
50-
}
71+
*szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight * ptInfo->uChannels * uBlockSize / (16 * ptInfo->uChannels);
5172

5273
if(puDataOut == NULL)
5374
return;
@@ -63,6 +84,10 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
6384
const uint32_t uOverflowV = (4 - (uAdjustedImageHeight - ptInfo->uHeight)) % 4;
6485
const uint32_t uWrapPosH = ptInfo->uWidth - uOverflowH;
6586
const uint32_t uWrapPosV = ptInfo->uHeight - uOverflowV;
87+
const uint32_t uDxtBlockWidth = ptInfo->uChannels > 2 ? 4 : ptInfo->uChannels;
88+
89+
uint32_t uBlocksPerRow = uAdjustedImageWidth / 4;
90+
uint32_t uBlocksPerColumn = uAdjustedImageHeight / 4;
6691

6792
uint8_t auPadded[4] = {0};
6893

@@ -74,130 +99,223 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
7499
if(ptInfo->uChannels == 4)
75100
iIncludeAlpha = 1;
76101

77-
if(uOverflowH == 0 && uOverflowV == 0 && ptInfo->uChannels > 2)
102+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~inner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103+
104+
if(ptInfo->uChannels > 2)
78105
{
79106

80107
for (uint32_t uRowStart = 0; uRowStart < uAdjustedImageHeight; uRowStart += 4)
81108
{
109+
const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4);
110+
const uint32_t uBlockRowBytesOffset = (ptInfo->uWidth * uBytesPerPixel * uRowStart);
82111
for (uint32_t uColumnStart = 0; uColumnStart < uAdjustedImageWidth; uColumnStart += 4)
83112
{
113+
114+
uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4;
115+
uOffset = uBlockSize * uBlockIndex;
116+
117+
const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel;
118+
puData = ptInfo->puData + uOffsetBytes;
119+
84120
for (uint32_t uRow = 0; uRow < 4; uRow++)
85121
{
86122
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
87123
{
88-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * ptInfo->uChannels;
89-
for(uint32_t uChannel = 0; uChannel < ptInfo->uChannels; uChannel++)
90-
auPadded[uChannel] = ptSource[uChannel];
91-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * 4, auPadded, 4);
124+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
125+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
92126
}
93127
}
94128

95129
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
96130
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
97-
98-
uOffset += uBlockSize;
99-
puData += uBytesPerPixel * 4;
100131
}
101-
puData += ptInfo->uWidth * uBytesPerPixel * 3; // by 3 since we already moved first row across
102132
}
103133
}
104-
else // slow path
134+
else if(ptInfo->uChannels == 2)
105135
{
106-
107136
for (uint32_t uRowStart = 0; uRowStart < uAdjustedImageHeight; uRowStart += 4)
108137
{
138+
const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4);
139+
const uint32_t uBlockRowBytesOffset = (ptInfo->uWidth * uBytesPerPixel * uRowStart);
109140
for (uint32_t uColumnStart = 0; uColumnStart < uAdjustedImageWidth; uColumnStart += 4)
110141
{
111-
if (uRowStart >= uWrapPosV && uColumnStart >= uWrapPosH) // overflow on bottom right corner
112-
{
113-
memset(auInDataBuf, 255, 64);
114142

115-
for (uint32_t uRow = 0; uRow < uOverflowV; uRow++)
143+
uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4;
144+
uOffset = uBlockSize * uBlockIndex;
145+
146+
const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel;
147+
puData = ptInfo->puData + uOffsetBytes;
148+
149+
for (uint32_t uRow = 0; uRow < 4; uRow++)
150+
{
151+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
116152
{
117-
for (uint32_t uColumn = 0; uColumn < uOverflowH; uColumn++)
118-
{
119-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
120-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
121-
}
153+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
154+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
122155
}
123156
}
124-
else if(uColumnStart >= uWrapPosH) // overflow on right
125-
{
126-
memset(auInDataBuf, 0, 64);
127157

128-
for (uint32_t uRow = 0; uRow < 4; uRow++)
158+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
159+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
160+
}
161+
}
162+
}
163+
else if(ptInfo->uChannels == 1)
164+
{
165+
for (uint32_t uRowStart = 0; uRowStart < uAdjustedImageHeight; uRowStart += 4)
166+
{
167+
const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4);
168+
const uint32_t uBlockRowBytesOffset = (ptInfo->uWidth * uBytesPerPixel * uRowStart);
169+
for (uint32_t uColumnStart = 0; uColumnStart < uAdjustedImageWidth; uColumnStart += 4)
170+
{
171+
uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4;
172+
uOffset = uBlockSize * uBlockIndex;
173+
174+
const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel;
175+
puData = ptInfo->puData + uOffsetBytes;
176+
177+
for (uint32_t uRow = 0; uRow < 4; uRow++)
178+
{
179+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
129180
{
130-
for (uint32_t uColumn = 0; uColumn < uOverflowH; uColumn++)
131-
{
132-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
133-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
134-
135-
if(uColumn == uOverflowH - 1)
136-
{
137-
for(uint32_t iOverflow = 0; iOverflow < uOverflowV; iOverflow++)
138-
{
139-
memcpy(auInDataBuf + (uRow * 4 + uColumn + iOverflow + 1) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
140-
}
141-
}
142-
}
181+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
182+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
143183
}
144184
}
145-
else if (uRowStart >= uWrapPosV) // overflow on bottom
185+
186+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
187+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
188+
}
189+
}
190+
}
191+
192+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~right fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193+
194+
if(uOverflowH > 0)
195+
{
196+
for (uint32_t uRowStart = 0; uRowStart < uWrapPosV; uRowStart += 4)
197+
{
198+
uint32_t uBlockIndex = uRowStart * uAdjustedImageWidth / 16;
199+
uBlockIndex += uWrapPosH / 4;
200+
uOffset = uBlockSize * uBlockIndex;
201+
const uint32_t uOffsetBytes = (ptInfo->uWidth * uBytesPerPixel * uRowStart) + uWrapPosH * uBytesPerPixel;
202+
puData = ptInfo->puData + uOffsetBytes;
203+
204+
memset(auInDataBuf, 255, 64);
205+
206+
for (uint32_t uRow = 0; uRow < 4; uRow++)
207+
{
208+
for (uint32_t uColumn = 0; uColumn < uOverflowH; uColumn++)
146209
{
147-
memset(auInDataBuf, 255, 64);
210+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
211+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
148212

149-
for (uint32_t uRow = 0; uRow < uOverflowV; uRow++)
213+
if(uColumn == uOverflowH - 1)
150214
{
151-
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
152-
{
153-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
154-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
155-
if(uRow == uOverflowV - 1)
156-
{
157-
for(uint32_t iOverflow = 0; iOverflow < uOverflowV; iOverflow++)
158-
{
159-
memcpy(auInDataBuf + ((uRow + iOverflow + 1) * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
160-
}
161-
}
162-
}
215+
for(uint32_t iOverflow = 0; iOverflow < 4 - uOverflowH; iOverflow++)
216+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn + iOverflow + 1, uRow, auInDataBuf);
163217
}
164218
}
219+
}
220+
221+
memset(auOutDataBuf, 0, 16);
165222

166-
else
223+
switch (ptInfo->uChannels)
224+
{
225+
case 3:
226+
case 4:
227+
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
228+
break;
229+
case 1:
230+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
231+
break;
232+
case 2:
233+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
234+
break;
235+
}
236+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
237+
}
238+
239+
}
240+
241+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~bottom fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
242+
243+
if(uOverflowV > 0)
244+
{
245+
for (uint32_t uColumnStart = 0; uColumnStart < uWrapPosH; uColumnStart += 4)
246+
{
247+
uint32_t uBlockIndex = uColumnStart / 4 + uBlocksPerRow * uWrapPosV / 4;
248+
uOffset = uBlockSize * uBlockIndex;
249+
const uint32_t uOffsetBytes = (ptInfo->uWidth * uBytesPerPixel * uWrapPosV) + uColumnStart * uBytesPerPixel;
250+
puData = ptInfo->puData + uOffsetBytes;
251+
252+
memset(auInDataBuf, 255, 64);
253+
254+
for (uint32_t uRow = 0; uRow < uOverflowV; uRow++)
255+
{
256+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
167257
{
168-
for (uint32_t uRow = 0; uRow < 4; uRow++)
258+
pl__dxt_sample(puData, ptInfo->uChannels, ptInfo->uWidth, uColumn, uRow, auPadded);
259+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
260+
if(uRow == uOverflowV - 1)
169261
{
170-
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
171-
{
172-
const uint8_t* ptSource = puData + (uRow * ptInfo->uWidth + uColumn) * 4;
173-
memcpy(auInDataBuf + (uRow * 4 + uColumn) * ptInfo->uChannels, ptSource, ptInfo->uChannels);
174-
}
262+
for(uint32_t iOverflow = 0; iOverflow < 4 - uOverflowV; iOverflow++)
263+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow + iOverflow + 1, auInDataBuf);
175264
}
176265
}
266+
}
177267

178-
switch (ptInfo->uChannels)
179-
{
180-
case 3:
181-
case 4:
182-
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
183-
break;
184-
case 1:
185-
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
186-
break;
187-
case 2:
188-
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
189-
break;
190-
}
191-
memcpy(&puDataOut[uOffset], auOutDataBuf, 16);
268+
memset(auOutDataBuf, 0, 16);
192269

193-
uOffset += 16;
194-
if(uColumnStart >= uWrapPosH && uOverflowH > 0)
195-
puData += uBytesPerPixel * uOverflowH;
196-
else
197-
puData += uBytesPerPixel * 4;
270+
switch (ptInfo->uChannels)
271+
{
272+
case 3:
273+
case 4:
274+
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
275+
break;
276+
case 1:
277+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
278+
break;
279+
case 2:
280+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
281+
break;
198282
}
199-
puData += ptInfo->uWidth * uBytesPerPixel * 3; // by 3 since we already moved first row across
283+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
284+
}
285+
}
286+
287+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~corner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
288+
289+
if(uOverflowV > 0 && uOverflowH > 0)
290+
{
291+
uint32_t uBlockIndex = uBlocksPerRow * uBlocksPerColumn - 1;
292+
uOffset = uBlockSize * uBlockIndex;
293+
294+
memset(auInDataBuf, 255, 64);
295+
memset(auOutDataBuf, 0, 16);
296+
297+
for (uint32_t uRow = 0; uRow < 4; uRow++)
298+
{
299+
for (uint32_t uColumn = 0; uColumn < 4; uColumn++)
300+
{
301+
pl__dxt_sample_wrap(ptInfo->puData, ptInfo->uChannels, ptInfo->uWidth, ptInfo->uHeight, uWrapPosH + uColumn, uWrapPosV + uRow, auPadded);
302+
pl__dxt_copy(auPadded, uDxtBlockWidth, uColumn, uRow, auInDataBuf);
303+
}
304+
}
305+
switch (ptInfo->uChannels)
306+
{
307+
case 3:
308+
case 4:
309+
stb_compress_dxt_block(auOutDataBuf, auInDataBuf, iIncludeAlpha, iDxtFlags);
310+
break;
311+
case 1:
312+
stb_compress_bc4_block(auOutDataBuf, auInDataBuf);
313+
break;
314+
case 2:
315+
stb_compress_bc5_block(auOutDataBuf, auInDataBuf);
316+
break;
200317
}
318+
memcpy(&puDataOut[uOffset], auOutDataBuf, uBlockSize);
201319
}
202320
}
203321

0 commit comments

Comments
 (0)