@@ -190,6 +190,39 @@ return: ; preds = %entry, %if.end
190
190
ret i32 %retval.0
191
191
}
192
192
193
+ define i32 @ctz3_with_i8gep (i32 %x ) {
194
+ ; CHECK-LABEL: @ctz3_with_i8gep(
195
+ ; CHECK-NEXT: entry:
196
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
197
+ ; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]]
198
+ ; CHECK: if.end:
199
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
200
+ ; CHECK-NEXT: br label [[RETURN]]
201
+ ; CHECK: return:
202
+ ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[TMP2]], [[IF_END]] ], [ 32, [[ENTRY:%.*]] ]
203
+ ; CHECK-NEXT: ret i32 [[RETVAL_0]]
204
+ ;
205
+ entry:
206
+ %cmp = icmp eq i32 %x , 0
207
+ br i1 %cmp , label %return , label %if.end
208
+
209
+ if.end: ; preds = %entry
210
+ %sub = sub i32 0 , %x
211
+ %and = and i32 %x , %sub
212
+ %mul = mul i32 %and , 81224991
213
+ %0 = lshr i32 %mul , 25
214
+ %1 = and i32 %0 , 124
215
+ %arrayidx.idx = zext nneg i32 %1 to i64
216
+ %arrayidx = getelementptr inbounds nuw i8 , ptr @ctz3.table , i64 %arrayidx.idx
217
+ %2 = load i32 , ptr %arrayidx , align 4
218
+ br label %return
219
+
220
+ return: ; preds = %if.end, %entry
221
+ %retval.0 = phi i32 [ %2 , %if.end ], [ 32 , %entry ]
222
+ ret i32 %retval.0
223
+ }
224
+
225
+
193
226
@table = internal unnamed_addr constant [64 x i32 ] [i32 0 , i32 1 , i32 12 , i32 2 , i32 13 , i32 22 , i32 17 , i32 3 , i32 14 , i32 33 , i32 23 , i32 36 , i32 18 , i32 58 , i32 28 , i32 4 , i32 62 , i32 15 , i32 34 , i32 26 , i32 24 , i32 48 , i32 50 , i32 37 , i32 19 , i32 55 , i32 59 , i32 52 , i32 29 , i32 44 , i32 39 , i32 5 , i32 63 , i32 11 , i32 21 , i32 16 , i32 32 , i32 35 , i32 57 , i32 27 , i32 61 , i32 25 , i32 47 , i32 49 , i32 54 , i32 51 , i32 43 , i32 38 , i32 10 , i32 20 , i32 31 , i32 56 , i32 60 , i32 46 , i32 53 , i32 42 , i32 9 , i32 30 , i32 45 , i32 41 , i32 8 , i32 40 , i32 7 , i32 6 ], align 4
194
227
195
228
define i32 @ctz4 (i64 %b ) {
@@ -276,3 +309,192 @@ entry:
276
309
%0 = load i32 , ptr %arrayidx , align 4
277
310
ret i32 %0
278
311
}
312
+
313
+ ;; This has a wrong table size but is otherwise fine.
314
+ @ctz9.table = internal unnamed_addr constant [128 x i8 ] c "\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09 " , align 1
315
+ define i32 @ctz9 (i32 %x ) {
316
+ ; CHECK-LABEL: @ctz9(
317
+ ; CHECK-NEXT: entry:
318
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
319
+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
320
+ ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
321
+ ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
322
+ ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32
323
+ ; CHECK-NEXT: ret i32 [[CONV]]
324
+ ;
325
+ entry:
326
+ %sub = sub i32 0 , %x
327
+ %and = and i32 %sub , %x
328
+ %mul = mul i32 %and , 125613361
329
+ %shr = lshr i32 %mul , 27
330
+ %idxprom = zext i32 %shr to i64
331
+ %arrayidx = getelementptr inbounds [128 x i8 ], ptr @ctz9.table , i64 0 , i64 %idxprom
332
+ %0 = load i8 , ptr %arrayidx , align 1
333
+ %conv = zext i8 %0 to i32
334
+ ret i32 %conv
335
+ }
336
+
337
+ define i32 @ctz1_with_i8_gep (i32 %x ) {
338
+ ; CHECK-LABEL: @ctz1_with_i8_gep(
339
+ ; CHECK-NEXT: entry:
340
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
341
+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
342
+ ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
343
+ ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
344
+ ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32
345
+ ; CHECK-NEXT: ret i32 [[CONV]]
346
+ ;
347
+ entry:
348
+ %sub = sub i32 0 , %x
349
+ %and = and i32 %sub , %x
350
+ %mul = mul i32 %and , 125613361
351
+ %shr = lshr i32 %mul , 27
352
+ %idxprom = zext i32 %shr to i64
353
+ %arrayidx = getelementptr inbounds i8 , ptr @ctz7.table , i64 %idxprom
354
+ %0 = load i8 , ptr %arrayidx , align 1
355
+ %conv = zext i8 %0 to i32
356
+ ret i32 %conv
357
+ }
358
+
359
+ ; This is the same a ctz2 (i16 table) with an i8 gep making the indices invalid
360
+ define i32 @ctz2_with_i8_gep (i32 %x ) {
361
+ ; CHECK-LABEL: @ctz2_with_i8_gep(
362
+ ; CHECK-NEXT: entry:
363
+ ; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
364
+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
365
+ ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], 72416175
366
+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 26
367
+ ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[SHR]] to i64
368
+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 [[IDXPROM]]
369
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 1
370
+ ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
371
+ ; CHECK-NEXT: ret i32 [[CONV]]
372
+ ;
373
+ entry:
374
+ %sub = sub i32 0 , %x
375
+ %and = and i32 %sub , %x
376
+ %mul = mul i32 %and , 72416175
377
+ %shr = lshr i32 %mul , 26
378
+ %idxprom = zext i32 %shr to i64
379
+ %arrayidx = getelementptr inbounds [64 x i8 ], ptr @ctz2.table , i64 0 , i64 %idxprom
380
+ %0 = load i16 , ptr %arrayidx , align 1
381
+ %conv = sext i16 %0 to i32
382
+ ret i32 %conv
383
+ }
384
+
385
+ ; This is the same a ctz2_with_i8_gep but with the gep index multiplied by 2.
386
+ define i32 @ctz2_with_i8_gep_fixed (i32 %x ) {
387
+ ; CHECK-LABEL: @ctz2_with_i8_gep_fixed(
388
+ ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false)
389
+ ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
390
+ ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
391
+ ; CHECK-NEXT: ret i32 [[CONV]]
392
+ ;
393
+ %sub = sub i32 0 , %x
394
+ %and = and i32 %x , %sub
395
+ %mul = mul i32 %and , 72416175
396
+ %shr = lshr i32 %mul , 25
397
+ %shr2 = and i32 %shr , 126
398
+ %1 = zext nneg i32 %shr2 to i64
399
+ %arrayidx = getelementptr inbounds nuw i8 , ptr @ctz2.table , i64 %1
400
+ %2 = load i16 , ptr %arrayidx , align 2
401
+ %conv = sext i16 %2 to i32
402
+ ret i32 %conv
403
+ }
404
+
405
+ ; This is a i16 input with the debruijn table stored in a single i128.
406
+ @tablei128 = internal unnamed_addr constant i128 16018378897745984667142067713738932480 , align 16
407
+ define i32 @cttz_i16_via_i128 (i16 noundef %x ) {
408
+ ; CHECK-LABEL: @cttz_i16_via_i128(
409
+ ; CHECK-NEXT: entry:
410
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 true)
411
+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[X]], 0
412
+ ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP0]]
413
+ ; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[TMP2]] to i8
414
+ ; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
415
+ ; CHECK-NEXT: ret i32 [[CONV6]]
416
+ ;
417
+ entry:
418
+ %sub = sub i16 0 , %x
419
+ %and = and i16 %x , %sub
420
+ %mul = mul i16 %and , 2479
421
+ %0 = lshr i16 %mul , 12
422
+ %idxprom = zext nneg i16 %0 to i64
423
+ %arrayidx = getelementptr inbounds nuw i8 , ptr @tablei128 , i64 %idxprom
424
+ %1 = load i8 , ptr %arrayidx , align 1
425
+ %conv6 = zext i8 %1 to i32
426
+ ret i32 %conv6
427
+ }
428
+
429
+ ; Same as above but the table is a little off
430
+ @tablei128b = internal unnamed_addr constant i128 16018378897745984667142068813250560256 , align 16
431
+ define i32 @cttz_i16_via_i128_incorrecttable (i16 noundef %x ) {
432
+ ; CHECK-LABEL: @cttz_i16_via_i128_incorrecttable(
433
+ ; CHECK-NEXT: entry:
434
+ ; CHECK-NEXT: [[SUB:%.*]] = sub i16 0, [[X:%.*]]
435
+ ; CHECK-NEXT: [[AND:%.*]] = and i16 [[X]], [[SUB]]
436
+ ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[AND]], 2479
437
+ ; CHECK-NEXT: [[TMP0:%.*]] = lshr i16 [[MUL]], 12
438
+ ; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i16 [[TMP0]] to i64
439
+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr @tablei128b, i64 [[IDXPROM]]
440
+ ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
441
+ ; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TMP3]] to i32
442
+ ; CHECK-NEXT: ret i32 [[CONV6]]
443
+ ;
444
+ entry:
445
+ %sub = sub i16 0 , %x
446
+ %and = and i16 %x , %sub
447
+ %mul = mul i16 %and , 2479
448
+ %0 = lshr i16 %mul , 12
449
+ %idxprom = zext nneg i16 %0 to i64
450
+ %arrayidx = getelementptr inbounds nuw i8 , ptr @tablei128b , i64 %idxprom
451
+ %1 = load i8 , ptr %arrayidx , align 1
452
+ %conv6 = zext i8 %1 to i32
453
+ ret i32 %conv6
454
+ }
455
+
456
+ ; Same as ctz1 but the table and load is very large
457
+ @ctz7i128.table = internal unnamed_addr constant [32 x i128 ] [i128 0 , i128 1 , i128 28 , i128 2 , i128 29 , i128 14 , i128 24 , i128 3 , i128 30 , i128 22 , i128 20 , i128 15 , i128 25 , i128 17 , i128 4 , i128 8 , i128 31 , i128 27 , i128 13 , i128 23 , i128 21 , i128 19 , i128 16 , i128 7 , i128 26 , i128 12 , i128 18 , i128 6 , i128 11 , i128 5 , i128 10 , i128 9 ], align 16
458
+ define i128 @ctz1_i128 (i32 %x ) {
459
+ ; CHECK-LABEL: @ctz1_i128(
460
+ ; CHECK-NEXT: entry:
461
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
462
+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
463
+ ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
464
+ ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i128
465
+ ; CHECK-NEXT: ret i128 [[TMP3]]
466
+ ;
467
+ entry:
468
+ %sub = sub i32 0 , %x
469
+ %and = and i32 %sub , %x
470
+ %mul = mul i32 %and , 125613361
471
+ %shr = lshr i32 %mul , 27
472
+ %idxprom = zext i32 %shr to i64
473
+ %arrayidx = getelementptr inbounds [32 x i128 ], ptr @ctz7i128.table , i64 0 , i64 %idxprom
474
+ %l = load i128 , ptr %arrayidx , align 1
475
+ ret i128 %l
476
+ }
477
+
478
+ ; This is roughly the same as ctz1 but using i128.
479
+ @table.i128 = internal unnamed_addr constant [128 x i8 ] c "\00\01 e\02 tf<\03 |ug^R=!\04 }yvWoh_5ZSE>0\22\14\05 ~rzPwmX.pkiI`K6\1A b[TBMF?'81*#\1C\15\0E\06\7F ds;{]Q xVn4YD/\13 qOl-jHJ\19 aAL&7)\1B\0D c:\\\ 1FU3C\12 N,G\18 @%(\0C 9\1E 2\11 +\17 $\0B\1D\10\16\0A\0F\09\08\07 " , align 1
480
+ define i32 @src (i128 noundef %x ) {
481
+ ; CHECK-LABEL: @src(
482
+ ; CHECK-NEXT: entry:
483
+ ; CHECK-NEXT: [[TMP3:%.*]] = call i128 @llvm.cttz.i128(i128 [[X:%.*]], i1 true)
484
+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i128 [[X]], 0
485
+ ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i128 0, i128 [[TMP3]]
486
+ ; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TMP2]] to i8
487
+ ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32
488
+ ; CHECK-NEXT: ret i32 [[CONV]]
489
+ ;
490
+ entry:
491
+ %sub = sub i128 0 , %x
492
+ %and = and i128 %x , %sub
493
+ %mul = mul i128 %and , 2647824804797170443043024478319300753
494
+ %shr = lshr i128 %mul , 121
495
+ %idxprom = trunc i128 %shr to i64
496
+ %arrayidx = getelementptr inbounds nuw i8 , ptr @table.i128 , i64 %idxprom
497
+ %0 = load i8 , ptr %arrayidx , align 1
498
+ %conv = zext i8 %0 to i32
499
+ ret i32 %conv
500
+ }
0 commit comments