55/*
66Index of this file:
77// [SECTION] includes
8+ // [SECTION] internal api
89// [SECTION] public api implementation
910// [SECTION] extension loading
1011// [SECTION] unity build
@@ -19,12 +20,37 @@ Index of this file:
1920#include "pl.h"
2021#include "pl_dxt_ext.h"
2122
22- // extensions
23- #include "pl_graphics_ext.h"
24-
2523// libraries
2624#include "stb_dxt.h"
2725
26+ //-----------------------------------------------------------------------------
27+ // [SECTION] internal api
28+ //-----------------------------------------------------------------------------
29+
30+ static inline void
31+ pl__dxt_sample (const uint8_t * puData , uint32_t uChannels , uint32_t uWidth , uint32_t uX , uint32_t uY , uint8_t * auOut )
32+ {
33+ const uint8_t * ptSource = puData + (uY * uWidth + uX ) * uChannels ;
34+ for (uint32_t uChannel = 0 ; uChannel < uChannels ; uChannel ++ )
35+ auOut [uChannel ] = ptSource [uChannel ];
36+ }
37+
38+ static inline void
39+ pl__dxt_sample_wrap (const uint8_t * puData , uint32_t uChannels , uint32_t uWidth , uint32_t uHeight , uint32_t uX , uint32_t uY , uint8_t * auOut )
40+ {
41+ uX = pl_clampu (0 , uX , uWidth - 1 );
42+ uY = pl_clampu (0 , uY , uHeight - 1 );
43+ const uint8_t * ptSource = puData + (uY * uWidth + uX ) * uChannels ;
44+ for (uint32_t uChannel = 0 ; uChannel < uChannels ; uChannel ++ )
45+ auOut [uChannel ] = ptSource [uChannel ];
46+ }
47+
48+ static inline void
49+ pl__dxt_copy (const uint8_t * puData , uint32_t uDxtBlockWidth , uint32_t uX , uint32_t uY , uint8_t * auOut )
50+ {
51+ memcpy (auOut + (uY * 4 + uX ) * uDxtBlockWidth , puData , uDxtBlockWidth );
52+ }
53+
2854//-----------------------------------------------------------------------------
2955// [SECTION] public api implementation
3056//-----------------------------------------------------------------------------
@@ -38,16 +64,7 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
3864 const uint32_t uBlockSize = (ptInfo -> uChannels == 2 || ptInfo -> uChannels == 4 ? 16 : 8 );
3965
4066 if (szSizeOut )
41- {
42- if (ptInfo -> uChannels == 4 )
43- * szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight ;
44- else if (ptInfo -> uChannels == 3 )
45- * szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight / 2 ;
46- else
47- {
48- PL_ASSERT (false && "Only supporting 3 & 4 channels for now" );
49- }
50- }
67+ * szSizeOut = uAdjustedImageWidth * uAdjustedImageHeight * ptInfo -> uChannels * uBlockSize / (16 * ptInfo -> uChannels );
5168
5269 if (puDataOut == NULL )
5370 return ;
@@ -63,6 +80,10 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
6380 const uint32_t uOverflowV = (4 - (uAdjustedImageHeight - ptInfo -> uHeight )) % 4 ;
6481 const uint32_t uWrapPosH = ptInfo -> uWidth - uOverflowH ;
6582 const uint32_t uWrapPosV = ptInfo -> uHeight - uOverflowV ;
83+ const uint32_t uDxtBlockWidth = ptInfo -> uChannels > 2 ? 4 : ptInfo -> uChannels ;
84+
85+ uint32_t uBlocksPerRow = uAdjustedImageWidth / 4 ;
86+ uint32_t uBlocksPerColumn = uAdjustedImageHeight / 4 ;
6687
6788 uint8_t auPadded [4 ] = {0 };
6889
@@ -74,130 +95,223 @@ pl_dxt_compress(const plDxtInfo* ptInfo, uint8_t* puDataOut, size_t* szSizeOut)
7495 if (ptInfo -> uChannels == 4 )
7596 iIncludeAlpha = 1 ;
7697
77- if (uOverflowH == 0 && uOverflowV == 0 && ptInfo -> uChannels > 2 )
98+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~inner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99+
100+ if (ptInfo -> uChannels > 2 )
78101 {
79102
80103 for (uint32_t uRowStart = 0 ; uRowStart < uAdjustedImageHeight ; uRowStart += 4 )
81104 {
105+ const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4 );
106+ const uint32_t uBlockRowBytesOffset = (ptInfo -> uWidth * uBytesPerPixel * uRowStart );
82107 for (uint32_t uColumnStart = 0 ; uColumnStart < uAdjustedImageWidth ; uColumnStart += 4 )
83108 {
109+
110+ uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4 ;
111+ uOffset = uBlockSize * uBlockIndex ;
112+
113+ const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel ;
114+ puData = ptInfo -> puData + uOffsetBytes ;
115+
84116 for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
85117 {
86118 for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
87119 {
88- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * ptInfo -> uChannels ;
89- for (uint32_t uChannel = 0 ; uChannel < ptInfo -> uChannels ; uChannel ++ )
90- auPadded [uChannel ] = ptSource [uChannel ];
91- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * 4 , auPadded , 4 );
120+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
121+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
92122 }
93123 }
94124
95125 stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
96126 memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
97-
98- uOffset += uBlockSize ;
99- puData += uBytesPerPixel * 4 ;
100127 }
101- puData += ptInfo -> uWidth * uBytesPerPixel * 3 ; // by 3 since we already moved first row across
102128 }
103129 }
104- else // slow path
130+ else if ( ptInfo -> uChannels == 2 )
105131 {
106-
107132 for (uint32_t uRowStart = 0 ; uRowStart < uAdjustedImageHeight ; uRowStart += 4 )
108133 {
134+ const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4 );
135+ const uint32_t uBlockRowBytesOffset = (ptInfo -> uWidth * uBytesPerPixel * uRowStart );
109136 for (uint32_t uColumnStart = 0 ; uColumnStart < uAdjustedImageWidth ; uColumnStart += 4 )
110137 {
111- if (uRowStart >= uWrapPosV && uColumnStart >= uWrapPosH ) // overflow on bottom right corner
112- {
113- memset (auInDataBuf , 255 , 64 );
114138
115- for (uint32_t uRow = 0 ; uRow < uOverflowV ; uRow ++ )
139+ uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4 ;
140+ uOffset = uBlockSize * uBlockIndex ;
141+
142+ const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel ;
143+ puData = ptInfo -> puData + uOffsetBytes ;
144+
145+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
146+ {
147+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
116148 {
117- for (uint32_t uColumn = 0 ; uColumn < uOverflowH ; uColumn ++ )
118- {
119- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
120- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
121- }
149+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
150+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
122151 }
123152 }
124- else if (uColumnStart >= uWrapPosH ) // overflow on right
125- {
126- memset (auInDataBuf , 0 , 64 );
127153
128- for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
154+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
155+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
156+ }
157+ }
158+ }
159+ else if (ptInfo -> uChannels == 1 )
160+ {
161+ for (uint32_t uRowStart = 0 ; uRowStart < uAdjustedImageHeight ; uRowStart += 4 )
162+ {
163+ const uint32_t uBlockRowOffset = (uBlocksPerRow * uRowStart / 4 );
164+ const uint32_t uBlockRowBytesOffset = (ptInfo -> uWidth * uBytesPerPixel * uRowStart );
165+ for (uint32_t uColumnStart = 0 ; uColumnStart < uAdjustedImageWidth ; uColumnStart += 4 )
166+ {
167+ uint32_t uBlockIndex = uBlockRowOffset + uColumnStart / 4 ;
168+ uOffset = uBlockSize * uBlockIndex ;
169+
170+ const uint32_t uOffsetBytes = uBlockRowBytesOffset + uColumnStart * uBytesPerPixel ;
171+ puData = ptInfo -> puData + uOffsetBytes ;
172+
173+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
174+ {
175+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
129176 {
130- for (uint32_t uColumn = 0 ; uColumn < uOverflowH ; uColumn ++ )
131- {
132- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
133- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
134-
135- if (uColumn == uOverflowH - 1 )
136- {
137- for (uint32_t iOverflow = 0 ; iOverflow < uOverflowV ; iOverflow ++ )
138- {
139- memcpy (auInDataBuf + (uRow * 4 + uColumn + iOverflow + 1 ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
140- }
141- }
142- }
177+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
178+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
143179 }
144180 }
145- else if (uRowStart >= uWrapPosV ) // overflow on bottom
181+
182+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
183+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
184+ }
185+ }
186+ }
187+
188+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~right fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189+
190+ if (uOverflowH > 0 )
191+ {
192+ for (uint32_t uRowStart = 0 ; uRowStart < uWrapPosV ; uRowStart += 4 )
193+ {
194+ uint32_t uBlockIndex = uRowStart * uAdjustedImageWidth / 16 ;
195+ uBlockIndex += uWrapPosH / 4 ;
196+ uOffset = uBlockSize * uBlockIndex ;
197+ const uint32_t uOffsetBytes = (ptInfo -> uWidth * uBytesPerPixel * uRowStart ) + uWrapPosH * uBytesPerPixel ;
198+ puData = ptInfo -> puData + uOffsetBytes ;
199+
200+ memset (auInDataBuf , 255 , 64 );
201+
202+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
203+ {
204+ for (uint32_t uColumn = 0 ; uColumn < uOverflowH ; uColumn ++ )
146205 {
147- memset (auInDataBuf , 255 , 64 );
206+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
207+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
148208
149- for ( uint32_t uRow = 0 ; uRow < uOverflowV ; uRow ++ )
209+ if ( uColumn == uOverflowH - 1 )
150210 {
151- for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
152- {
153- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
154- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
155- if (uRow == uOverflowV - 1 )
156- {
157- for (uint32_t iOverflow = 0 ; iOverflow < uOverflowV ; iOverflow ++ )
158- {
159- memcpy (auInDataBuf + ((uRow + iOverflow + 1 ) * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
160- }
161- }
162- }
211+ for (uint32_t iOverflow = 0 ; iOverflow < 4 - uOverflowH ; iOverflow ++ )
212+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn + iOverflow + 1 , uRow , auInDataBuf );
163213 }
164214 }
215+ }
216+
217+ memset (auOutDataBuf , 0 , 16 );
218+
219+ switch (ptInfo -> uChannels )
220+ {
221+ case 3 :
222+ case 4 :
223+ stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
224+ break ;
225+ case 1 :
226+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
227+ break ;
228+ case 2 :
229+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
230+ break ;
231+ }
232+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
233+ }
234+
235+ }
236+
237+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~bottom fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
165238
166- else
239+ if (uOverflowV > 0 )
240+ {
241+ for (uint32_t uColumnStart = 0 ; uColumnStart < uWrapPosH ; uColumnStart += 4 )
242+ {
243+ uint32_t uBlockIndex = uColumnStart / 4 + uBlocksPerRow * uWrapPosV / 4 ;
244+ uOffset = uBlockSize * uBlockIndex ;
245+ const uint32_t uOffsetBytes = (ptInfo -> uWidth * uBytesPerPixel * uWrapPosV ) + uColumnStart * uBytesPerPixel ;
246+ puData = ptInfo -> puData + uOffsetBytes ;
247+
248+ memset (auInDataBuf , 255 , 64 );
249+
250+ for (uint32_t uRow = 0 ; uRow < uOverflowV ; uRow ++ )
251+ {
252+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
167253 {
168- for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
254+ pl__dxt_sample (puData , ptInfo -> uChannels , ptInfo -> uWidth , uColumn , uRow , auPadded );
255+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
256+ if (uRow == uOverflowV - 1 )
169257 {
170- for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
171- {
172- const uint8_t * ptSource = puData + (uRow * ptInfo -> uWidth + uColumn ) * 4 ;
173- memcpy (auInDataBuf + (uRow * 4 + uColumn ) * ptInfo -> uChannels , ptSource , ptInfo -> uChannels );
174- }
258+ for (uint32_t iOverflow = 0 ; iOverflow < 4 - uOverflowV ; iOverflow ++ )
259+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow + iOverflow + 1 , auInDataBuf );
175260 }
176261 }
262+ }
177263
178- switch (ptInfo -> uChannels )
179- {
180- case 3 :
181- case 4 :
182- stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
183- break ;
184- case 1 :
185- stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
186- break ;
187- case 2 :
188- stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
189- break ;
190- }
191- memcpy (& puDataOut [uOffset ], auOutDataBuf , 16 );
264+ memset (auOutDataBuf , 0 , 16 );
265+
266+ switch (ptInfo -> uChannels )
267+ {
268+ case 3 :
269+ case 4 :
270+ stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
271+ break ;
272+ case 1 :
273+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
274+ break ;
275+ case 2 :
276+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
277+ break ;
278+ }
279+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
280+ }
281+ }
282+
283+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~corner fill~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284+
285+ if (uOverflowV > 0 && uOverflowH > 0 )
286+ {
287+ uint32_t uBlockIndex = uBlocksPerRow * uBlocksPerColumn - 1 ;
288+ uOffset = uBlockSize * uBlockIndex ;
289+
290+ memset (auInDataBuf , 255 , 64 );
291+ memset (auOutDataBuf , 0 , 16 );
192292
193- uOffset += 16 ;
194- if (uColumnStart >= uWrapPosH && uOverflowH > 0 )
195- puData += uBytesPerPixel * uOverflowH ;
196- else
197- puData += uBytesPerPixel * 4 ;
293+ for (uint32_t uRow = 0 ; uRow < 4 ; uRow ++ )
294+ {
295+ for (uint32_t uColumn = 0 ; uColumn < 4 ; uColumn ++ )
296+ {
297+ pl__dxt_sample_wrap (ptInfo -> puData , ptInfo -> uChannels , ptInfo -> uWidth , ptInfo -> uHeight , uWrapPosH + uColumn , uWrapPosV + uRow , auPadded );
298+ pl__dxt_copy (auPadded , uDxtBlockWidth , uColumn , uRow , auInDataBuf );
198299 }
199- puData += ptInfo -> uWidth * uBytesPerPixel * 3 ; // by 3 since we already moved first row across
200300 }
301+ switch (ptInfo -> uChannels )
302+ {
303+ case 3 :
304+ case 4 :
305+ stb_compress_dxt_block (auOutDataBuf , auInDataBuf , iIncludeAlpha , iDxtFlags );
306+ break ;
307+ case 1 :
308+ stb_compress_bc4_block (auOutDataBuf , auInDataBuf );
309+ break ;
310+ case 2 :
311+ stb_compress_bc5_block (auOutDataBuf , auInDataBuf );
312+ break ;
313+ }
314+ memcpy (& puDataOut [uOffset ], auOutDataBuf , uBlockSize );
201315 }
202316}
203317
0 commit comments