55/*
66Index of this file:
77// [SECTION] includes
8+ // [SECTION] internal api
89// [SECTION] public api implementation
910// [SECTION] extension loading
1011// [SECTION] unity build
@@ -19,12 +20,41 @@ Index of this file:
1920#include "pl.h"
2021#include "pl_dxt_ext.h"
2122
22- // extensions
23- #include "pl_graphics_ext.h"
23+ // libs
24+ #define PL_MATH_INCLUDE_FUNCTIONS
25+ #include "pl_math.h"
2426
2527// libraries
2628#include "stb_dxt.h"
2729
30+ //-----------------------------------------------------------------------------
31+ // [SECTION] internal api
32+ //-----------------------------------------------------------------------------
33+
34+ static inline void
35+ pl__dxt_sample (const uint8_t * puData , uint32_t uChannels , uint32_t uWidth , uint32_t uX , uint32_t uY , uint8_t * auOut )
36+ {
37+ const uint8_t * ptSource = puData + (uY * uWidth + uX ) * uChannels ;
38+ for (uint32_t uChannel = 0 ; uChannel < uChannels ; uChannel ++ )
39+ auOut [uChannel ] = ptSource [uChannel ];
40+ }
41+
42+ static inline void
43+ pl__dxt_sample_wrap (const uint8_t * puData , uint32_t uChannels , uint32_t uWidth , uint32_t uHeight , uint32_t uX , uint32_t uY , uint8_t * auOut )
44+ {
45+ uX = pl_clampu (0 , uX , uWidth - 1 );
46+ uY = pl_clampu (0 , uY , uHeight - 1 );
47+ const uint8_t * ptSource = puData + (uY * uWidth + uX ) * uChannels ;
48+ for (uint32_t uChannel = 0 ; uChannel < uChannels ; uChannel ++ )
49+ auOut [uChannel ] = ptSource [uChannel ];
50+ }
51+
52+ static inline void
53+ pl__dxt_copy (const uint8_t * puData , uint32_t uDxtBlockWidth , uint32_t uX , uint32_t uY , uint8_t * auOut )
54+ {
55+ memcpy (auOut + (uY * 4 + uX ) * uDxtBlockWidth , puData , uDxtBlockWidth );
56+ }
57+
2858//-----------------------------------------------------------------------------
2959// [SECTION] public api implementation
3060//-----------------------------------------------------------------------------
@@ -38,16 +68,7 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
3868 const uint32_t uBlockSize = (ptInfo -> uChannels == 2 || ptInfo -> uChannels == 4 ? 16 : 8 );
3969
4070 if (szSizeOut )
41- {
42- if (ptInfo -> uChannels == 4 )
43- * szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight ;
44- else if (ptInfo -> uChannels == 3 )
45- * szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight / 2 ;
46- else
47- {
48- PL_ASSERT (false && "Only supporting 3 & 4 channels for now" );
49- }
50- }
71+ * szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight * ptInfo -> uChannels * uBlockSize / (16 * ptInfo -> uChannels );
5172
5273 if (puDataOut == NULL )
5374 return ;
@@ -63,6 +84,10 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
6384 const uint32_t uOverflowV = (4 - (uAdjustedImageHeight - ptInfo -> uHeight )) % 4 ;
6485 const uint32_t uWrapPosH = ptInfo -> uWidth - uOverflowH ;
6586 const uint32_t uWrapPosV = ptInfo -> uHeight - uOverflowV ;
87+ const uint32_t uDxtBlockWidth = ptInfo -> uChannels > 2 ? 4 : ptInfo -> uChannels ;
88+
89+ uint32_t uBlocksPerRow = uAdjustedImageWidth / 4 ;
90+ uint32_t uBlocksPerColumn = uAdjustedImageHeight / 4 ;
6691
6792 uint8_t auPadded [4 ] = {0 };
6893
@@ -74,130 +99,223 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
7499 if (ptInfo -> uChannels == 4 )
75100 iIncludeAlpha = 1 ;
76101
77- if (uOverflowH == 0 && uOverflowV == 0 && ptInfo -> uChannels > 2 )
102+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~inner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103+
104+ if (ptInfo -> uChannels > 2 )
78105 {
79106
80107 for (uint32_t uRowStart = 0 ; uRowStart < uAdjustedImageHeight ; uRowStart += 4 )
81108 {
109+ const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4 );
110+ const uint32_t uBlockRowBytesOffset = (ptInfo -> uWidth * uBytesPerPixel * uRowStart );
82111 for (uint32_t uColumnStart = 0 ; uColumnStart < uAdjustedImageWidth ; uColumnStart += 4 )
83112 {
113+
114+ uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4 ;
115+ uOffset = uBlockSize * uBlockIndex ;
116+
117+ const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel ;
118+ puData = ptInfo -> puData + uOffsetBytes ;
119+
84120 for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
85121 {
86122 for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
87123 {
88- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * ptInfo -> uChannels ;
89- for (uint32_t uChannel = 0 ; uChannel < ptInfo -> uChannels ; uChannel ++ )
90- auPadded [uChannel ] = ptSource [uChannel ];
91- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * 4 , auPadded , 4 );
124+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
125+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
92126 }
93127 }
94128
95129 stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
96130 memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
97-
98- uOffset += uBlockSize ;
99- puData += uBytesPerPixel * 4 ;
100131 }
101- puData += ptInfo -> uWidth * uBytesPerPixel * 3 ; // by 3 since we already moved first row across
102132 }
103133 }
104- else // slow path
134+ else if ( ptInfo -> uChannels == 2 )
105135 {
106-
107136 for (uint32_t uRowStart = 0 ; uRowStart < uAdjustedImageHeight ; uRowStart += 4 )
108137 {
138+ const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4 );
139+ const uint32_t uBlockRowBytesOffset = (ptInfo -> uWidth * uBytesPerPixel * uRowStart );
109140 for (uint32_t uColumnStart = 0 ; uColumnStart < uAdjustedImageWidth ; uColumnStart += 4 )
110141 {
111- if (uRowStart >= uWrapPosV && uColumnStart >= uWrapPosH ) // overflow on bottom right corner
112- {
113- memset (auInDataBuf , 255 , 64 );
114142
115- for (uint32_t uRow = 0 ; uRow < uOverflowV ; uRow ++ )
143+ uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4 ;
144+ uOffset = uBlockSize * uBlockIndex ;
145+
146+ const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel ;
147+ puData = ptInfo -> puData + uOffsetBytes ;
148+
149+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
150+ {
151+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
116152 {
117- for (uint32_t uColumn = 0 ; uColumn < uOverflowH ; uColumn ++ )
118- {
119- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
120- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
121- }
153+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
154+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
122155 }
123156 }
124- else if (uColumnStart >= uWrapPosH ) // overflow on right
125- {
126- memset (auInDataBuf , 0 , 64 );
127157
128- for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
158+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
159+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
160+ }
161+ }
162+ }
163+ else if (ptInfo -> uChannels == 1 )
164+ {
165+ for (uint32_t uRowStart = 0 ; uRowStart < uAdjustedImageHeight ; uRowStart += 4 )
166+ {
167+ const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4 );
168+ const uint32_t uBlockRowBytesOffset = (ptInfo -> uWidth * uBytesPerPixel * uRowStart );
169+ for (uint32_t uColumnStart = 0 ; uColumnStart < uAdjustedImageWidth ; uColumnStart += 4 )
170+ {
171+ uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4 ;
172+ uOffset = uBlockSize * uBlockIndex ;
173+
174+ const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel ;
175+ puData = ptInfo -> puData + uOffsetBytes ;
176+
177+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
178+ {
179+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
129180 {
130- for (uint32_t uColumn = 0 ; uColumn < uOverflowH ; uColumn ++ )
131- {
132- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
133- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
134-
135- if (uColumn == uOverflowH - 1 )
136- {
137- for (uint32_t iOverflow = 0 ; iOverflow < uOverflowV ; iOverflow ++ )
138- {
139- memcpy (auInDataBuf + (uRow * 4 + uColumn + iOverflow + 1 ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
140- }
141- }
142- }
181+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
182+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
143183 }
144184 }
145- else if (uRowStart >= uWrapPosV ) // overflow on bottom
185+
186+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
187+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
188+ }
189+ }
190+ }
191+
192+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~right fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193+
194+ if (uOverflowH > 0 )
195+ {
196+ for (uint32_t uRowStart = 0 ; uRowStart < uWrapPosV ; uRowStart += 4 )
197+ {
198+ uint32_t uBlockIndex = uRowStart * uAdjustedImageWidth / 16 ;
199+ uBlockIndex += uWrapPosH / 4 ;
200+ uOffset = uBlockSize * uBlockIndex ;
201+ const uint32_t uOffsetBytes = (ptInfo -> uWidth * uBytesPerPixel * uRowStart ) + uWrapPosH * uBytesPerPixel ;
202+ puData = ptInfo -> puData + uOffsetBytes ;
203+
204+ memset (auInDataBuf , 255 , 64 );
205+
206+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
207+ {
208+ for (uint32_t uColumn = 0 ; uColumn < uOverflowH ; uColumn ++ )
146209 {
147- memset (auInDataBuf , 255 , 64 );
210+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
211+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
148212
149- for ( uint32_t uRow = 0 ; uRow < uOverflowV ; uRow ++ )
213+ if ( uColumn == uOverflowH - 1 )
150214 {
151- for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
152- {
153- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
154- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
155- if (uRow == uOverflowV - 1 )
156- {
157- for (uint32_t iOverflow = 0 ; iOverflow < uOverflowV ; iOverflow ++ )
158- {
159- memcpy (auInDataBuf + ((uRow + iOverflow + 1 ) * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
160- }
161- }
162- }
215+ for (uint32_t iOverflow = 0 ; iOverflow < 4 - uOverflowH ; iOverflow ++ )
216+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn + iOverflow + 1 , uRow , auInDataBuf );
163217 }
164218 }
219+ }
220+
221+ memset (auOutDataBuf , 0 , 16 );
165222
166- else
223+ switch (ptInfo -> uChannels )
224+ {
225+ case 3 :
226+ case 4 :
227+ stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
228+ break ;
229+ case 1 :
230+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
231+ break ;
232+ case 2 :
233+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
234+ break ;
235+ }
236+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
237+ }
238+
239+ }
240+
241+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~bottom fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
242+
243+ if (uOverflowV > 0 )
244+ {
245+ for (uint32_t uColumnStart = 0 ; uColumnStart < uWrapPosH ; uColumnStart += 4 )
246+ {
247+ uint32_t uBlockIndex = uColumnStart / 4 + uBlocksPerRow * uWrapPosV / 4 ;
248+ uOffset = uBlockSize * uBlockIndex ;
249+ const uint32_t uOffsetBytes = (ptInfo -> uWidth * uBytesPerPixel * uWrapPosV ) + uColumnStart * uBytesPerPixel ;
250+ puData = ptInfo -> puData + uOffsetBytes ;
251+
252+ memset (auInDataBuf , 255 , 64 );
253+
254+ for (uint32_t uRow = 0 ; uRow < uOverflowV ; uRow ++ )
255+ {
256+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
167257 {
168- for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
258+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
259+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
260+ if (uRow == uOverflowV - 1 )
169261 {
170- for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
171- {
172- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
173- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
174- }
262+ for (uint32_t iOverflow = 0 ; iOverflow < 4 - uOverflowV ; iOverflow ++ )
263+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow + iOverflow + 1 , auInDataBuf );
175264 }
176265 }
266+ }
177267
178- switch (ptInfo -> uChannels )
179- {
180- case 3 :
181- case 4 :
182- stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
183- break ;
184- case 1 :
185- stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
186- break ;
187- case 2 :
188- stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
189- break ;
190- }
191- memcpy (& puDataOut [uOffset ], auOutDataBuf , 16 );
268+ memset (auOutDataBuf , 0 , 16 );
192269
193- uOffset += 16 ;
194- if (uColumnStart >= uWrapPosH && uOverflowH > 0 )
195- puData += uBytesPerPixel * uOverflowH ;
196- else
197- puData += uBytesPerPixel * 4 ;
270+ switch (ptInfo -> uChannels )
271+ {
272+ case 3 :
273+ case 4 :
274+ stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
275+ break ;
276+ case 1 :
277+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
278+ break ;
279+ case 2 :
280+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
281+ break ;
198282 }
199- puData += ptInfo -> uWidth * uBytesPerPixel * 3 ; // by 3 since we already moved first row across
283+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
284+ }
285+ }
286+
287+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~corner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
288+
289+ if (uOverflowV > 0 && uOverflowH > 0 )
290+ {
291+ uint32_t uBlockIndex = uBlocksPerRow * uBlocksPerColumn - 1 ;
292+ uOffset = uBlockSize * uBlockIndex ;
293+
294+ memset (auInDataBuf , 255 , 64 );
295+ memset (auOutDataBuf , 0 , 16 );
296+
297+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
298+ {
299+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
300+ {
301+ pl__dxt_sample_wrap (ptInfo -> puData , ptInfo -> uChannels , ptInfo -> uWidth , ptInfo -> uHeight , uWrapPosH + uColumn , uWrapPosV + uRow , auPadded );
302+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
303+ }
304+ }
305+ switch (ptInfo -> uChannels )
306+ {
307+ case 3 :
308+ case 4 :
309+ stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
310+ break ;
311+ case 1 :
312+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
313+ break ;
314+ case 2 :
315+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
316+ break ;
200317 }
318+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
201319 }
202320}
203321
0 commit comments