@@ -117,21 +117,6 @@ static double graymodel_interpolate(struct graymodel *gm, double x, double y)
117117 return gm -> C [0 ]* x + gm -> C [1 ]* y + gm -> C [2 ];
118118}
119119
120- struct quick_decode_result
121- {
122- uint64_t rcode ; // the queried code
123- uint16_t id ; // the tag ID (a small integer)
124- uint8_t hamming ; // how many errors corrected?
125- uint8_t rotation ; // number of rotations [0, 3]
126- };
127-
128- struct quick_decode
129- {
130- int nentries ;
131- uint16_t * entries ;
132- int maxhamming ;
133- };
134-
135120static inline int popcount64 (uint64_t x )
136121{
137122 x -= (x >> 1 ) & 0x5555555555555555ULL ;
@@ -140,8 +125,6 @@ static inline int popcount64(uint64_t x)
140125 return (x * 0x0101010101010101ULL ) >> 56 ;
141126}
142127
143- #define QD_ENTRY_EMPTY 65535
144-
145128/**
146129 * Assuming we are drawing the image one quadrant at a time, what would the rotated image look like?
147130 * Special care is taken to handle the case where there is a middle pixel of the image.
@@ -180,24 +163,37 @@ static struct quad *quad_copy(struct quad *quad)
180163 return q ;
181164}
182165
183- static void quick_decode_add ( struct quick_decode * qd , uint64_t code , int id )
166+ struct quick_decode_result
184167{
185- uint32_t bucket = code % qd -> nentries ;
186-
187- while ( qd -> entries [ bucket ] != QD_ENTRY_EMPTY ) {
188- bucket = ( bucket + 1 ) % qd -> nentries ;
189- }
168+ uint64_t rcode ; // the queried code
169+ uint16_t id ; // the tag ID (a small integer)
170+ uint8_t hamming ; // how many errors corrected?
171+ uint8_t rotation ; // number of rotations [0, 3]
172+ };
190173
191- qd -> entries [bucket ] = id ;
192- }
174+ struct quick_decode
175+ {
176+ int nbits ;
177+ int chunk_size ;
178+ int capacity ;
179+ int chunk_mask ;
180+ int shifts [4 ];
181+ uint16_t * chunk_offsets [4 ];
182+ uint16_t * chunk_ids [4 ];
183+ int maxhamming ;
184+ int ncodes ;
185+ };
193186
194187static void quick_decode_uninit (apriltag_family_t * fam )
195188{
196189 if (!fam -> impl )
197190 return ;
198191
199192 struct quick_decode * qd = (struct quick_decode * ) fam -> impl ;
200- free (qd -> entries );
193+ for (int i = 0 ; i < 4 ; i ++ ) {
194+ free (qd -> chunk_offsets [i ]);
195+ free (qd -> chunk_ids [i ]);
196+ }
201197 free (qd );
202198 fam -> impl = NULL ;
203199}
@@ -207,73 +203,68 @@ static void quick_decode_init(apriltag_family_t *family, int maxhamming)
207203 assert (family -> impl == NULL );
208204 assert (family -> ncodes < 65536 );
209205
206+ if (maxhamming > 3 ) {
207+ debug_print ("\"maxhamming\" beyond 3 not supported\n" );
208+ errno = EINVAL ;
209+ return ;
210+ }
211+
210212 struct quick_decode * qd = calloc (1 , sizeof (struct quick_decode ));
211213 qd -> maxhamming = maxhamming ;
212- int capacity = family -> ncodes ;
213-
214- int nbits = family -> nbits ;
215-
216- if (maxhamming >= 1 )
217- capacity += family -> ncodes * nbits ;
218-
219- if (maxhamming >= 2 )
220- capacity += family -> ncodes * (nbits * (nbits - 1 )) / 2 ;
214+ qd -> ncodes = family -> ncodes ;
215+ qd -> nbits = family -> nbits ;
221216
222- if (maxhamming >= 3 )
223- capacity += family -> ncodes * nbits * ((nbits - 1 ) * (nbits - 2 )) / 6 ;
217+ qd -> chunk_size = (qd -> nbits + 3 ) / 4 ;
218+ qd -> capacity = 1 << qd -> chunk_size ;
219+ qd -> chunk_mask = qd -> capacity - 1 ;
224220
225- qd -> nentries = capacity * 3 ;
221+ qd -> shifts [0 ] = 0 ;
222+ qd -> shifts [1 ] = qd -> chunk_size ;
223+ qd -> shifts [2 ] = qd -> chunk_size * 2 ;
224+ qd -> shifts [3 ] = qd -> chunk_size * 3 ;
226225
227- // debug_print("capacity %d, size: %.0f kB\n",
228- // capacity, qd->nentries * sizeof(uint16_t) / 1024.0);
229-
230- qd -> entries = malloc (qd -> nentries * sizeof (uint16_t ));
231- if (qd -> entries == NULL ) {
232- debug_print ("Failed to allocate hamming decode table\n" );
233- // errno already set to ENOMEM (Error No MEMory) by calloc() failure
234- return ;
226+ for (int i = 0 ; i < 4 ; i ++ ) {
227+ qd -> chunk_offsets [i ] = calloc (qd -> capacity + 1 , sizeof (uint16_t ));
228+ qd -> chunk_ids [i ] = calloc (qd -> ncodes , sizeof (uint16_t ));
235229 }
236230
237- for (int i = 0 ; i < qd -> nentries ; i ++ )
238- qd -> entries [i ] = QD_ENTRY_EMPTY ;
239-
240- errno = 0 ;
241-
242- for (uint32_t i = 0 ; i < family -> ncodes ; i ++ ) {
231+ // Count frequencies
232+ for (int i = 0 ; i < qd -> ncodes ; i ++ ) {
243233 uint64_t code = family -> codes [i ];
244-
245- // add exact code (hamming = 0)
246- quick_decode_add (qd , code , i );
247-
248- if (maxhamming >= 1 ) {
249- // add hamming 1
250- for (int j = 0 ; j < nbits ; j ++ )
251- quick_decode_add (qd , code ^ (APRILTAG_U64_ONE << j ), i );
234+ for (int j = 0 ; j < 4 ; j ++ ) {
235+ int val = (code >> qd -> shifts [j ]) & qd -> chunk_mask ;
236+ qd -> chunk_offsets [j ][val + 1 ]++ ;
252237 }
238+ }
253239
254- if (maxhamming >= 2 ) {
255- // add hamming 2
256- for (int j = 0 ; j < nbits ; j ++ )
257- for (int k = 0 ; k < j ; k ++ )
258- quick_decode_add (qd , code ^ (APRILTAG_U64_ONE << j ) ^ (APRILTAG_U64_ONE << k ), i );
240+ // Prefix sum
241+ for (int i = 0 ; i < 4 ; i ++ ) {
242+ for (int j = 0 ; j < qd -> capacity ; j ++ ) {
243+ qd -> chunk_offsets [i ][j + 1 ] += qd -> chunk_offsets [i ][j ];
259244 }
245+ }
260246
261- if (maxhamming >= 3 ) {
262- // add hamming 3
263- for (int j = 0 ; j < nbits ; j ++ )
264- for (int k = 0 ; k < j ; k ++ )
265- for (int m = 0 ; m < k ; m ++ )
266- quick_decode_add (qd , code ^ (APRILTAG_U64_ONE << j ) ^ (APRILTAG_U64_ONE << k ) ^ (APRILTAG_U64_ONE << m ), i );
267- }
247+ // Populate ids
248+ uint16_t * cursors [4 ];
249+ for (int i = 0 ; i < 4 ; i ++ ) {
250+ cursors [i ] = malloc ((qd -> capacity + 1 ) * sizeof (uint16_t ));
251+ memcpy (cursors [i ], qd -> chunk_offsets [i ], (qd -> capacity + 1 ) * sizeof (uint16_t ));
252+ }
268253
269- if (maxhamming > 3 ) {
270- debug_print ("\"maxhamming\" beyond 3 not supported\n" );
271- // set errno to Error INvalid VALue
272- errno = EINVAL ;
273- return ;
254+ for (int i = 0 ; i < qd -> ncodes ; i ++ ) {
255+ uint64_t code = family -> codes [i ];
256+ for (int j = 0 ; j < 4 ; j ++ ) {
257+ int val = (code >> qd -> shifts [j ]) & qd -> chunk_mask ;
258+ int write_pos = cursors [j ][val ];
259+ qd -> chunk_ids [j ][write_pos ] = i ;
260+ cursors [j ][val ]++ ;
274261 }
275262 }
276263
264+ for (int i = 0 ; i < 4 ; i ++ ) {
265+ free (cursors [i ]);
266+ }
267+
277268 family -> impl = qd ;
278269}
279270
@@ -286,20 +277,23 @@ static void quick_decode_codeword(apriltag_family_t *tf, uint64_t rcode,
286277 // qd might be null if detector_add_family_bits() failed
287278 for (int ridx = 0 ; qd != NULL && ridx < 4 ; ridx ++ ) {
288279
289- for (int bucket = rcode % qd -> nentries ;
290- qd -> entries [bucket ] != QD_ENTRY_EMPTY ;
291- bucket = (bucket + 1 ) % qd -> nentries ) {
292-
293- uint16_t id = qd -> entries [bucket ];
294- uint64_t correct_code = tf -> codes [id ];
295- int hamming = popcount64 (correct_code ^ rcode );
296-
297- if (hamming <= qd -> maxhamming ) {
298- res -> rcode = rcode ;
299- res -> id = id ;
300- res -> hamming = hamming ;
301- res -> rotation = ridx ;
302- return ;
280+ for (int i = 0 ; i < 4 ; i ++ ) {
281+ int val = (rcode >> qd -> shifts [i ]) & qd -> chunk_mask ;
282+ int start = qd -> chunk_offsets [i ][val ];
283+ int end = qd -> chunk_offsets [i ][val + 1 ];
284+
285+ for (int j = start ; j < end ; j ++ ) {
286+ uint16_t id = qd -> chunk_ids [i ][j ];
287+ uint64_t correct_code = tf -> codes [id ];
288+ int hamming = popcount64 (correct_code ^ rcode );
289+
290+ if (hamming <= qd -> maxhamming ) {
291+ res -> rcode = rcode ;
292+ res -> id = id ;
293+ res -> hamming = hamming ;
294+ res -> rotation = ridx ;
295+ return ;
296+ }
303297 }
304298 }
305299
0 commit comments