@@ -322,5 +322,132 @@ define void @g(i32 %a) nounwind {
322322 ret void
323323}
324324
325+ define i32 @shift_zext_shl (i8 zeroext %x ) {
326+ ; X86-LABEL: shift_zext_shl:
327+ ; X86: # %bb.0:
328+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329+ ; X86-NEXT: andl $64, %eax
330+ ; X86-NEXT: shll $9, %eax
331+ ; X86-NEXT: retl
332+ ;
333+ ; X64-LABEL: shift_zext_shl:
334+ ; X64: # %bb.0:
335+ ; X64-NEXT: movl %edi, %eax
336+ ; X64-NEXT: andl $64, %eax
337+ ; X64-NEXT: shll $9, %eax
338+ ; X64-NEXT: retq
339+ %a = and i8 %x , 64
340+ %b = zext i8 %a to i16
341+ %c = shl i16 %b , 9
342+ %d = zext i16 %c to i32
343+ ret i32 %d
344+ }
345+
346+ define i32 @shift_zext_shl2 (i8 zeroext %x ) {
347+ ; X86-LABEL: shift_zext_shl2:
348+ ; X86: # %bb.0:
349+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
350+ ; X86-NEXT: andl $64, %eax
351+ ; X86-NEXT: shll $9, %eax
352+ ; X86-NEXT: retl
353+ ;
354+ ; X64-LABEL: shift_zext_shl2:
355+ ; X64: # %bb.0:
356+ ; X64-NEXT: movl %edi, %eax
357+ ; X64-NEXT: andl $64, %eax
358+ ; X64-NEXT: shll $9, %eax
359+ ; X64-NEXT: retq
360+ %a = and i8 %x , 64
361+ %b = zext i8 %a to i32
362+ %c = shl i32 %b , 9
363+ ret i32 %c
364+ }
365+
366+ define <4 x i32 > @shift_zext_shl_vec (<4 x i8 > %x ) nounwind {
367+ ; X86-LABEL: shift_zext_shl_vec:
368+ ; X86: # %bb.0:
369+ ; X86-NEXT: pushl %edi
370+ ; X86-NEXT: pushl %esi
371+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
373+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
374+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
375+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
376+ ; X86-NEXT: andl $64, %ecx
377+ ; X86-NEXT: shll $9, %ecx
378+ ; X86-NEXT: andl $63, %edx
379+ ; X86-NEXT: shll $8, %edx
380+ ; X86-NEXT: andl $31, %esi
381+ ; X86-NEXT: shll $7, %esi
382+ ; X86-NEXT: andl $23, %edi
383+ ; X86-NEXT: shll $6, %edi
384+ ; X86-NEXT: movl %edi, 12(%eax)
385+ ; X86-NEXT: movl %esi, 8(%eax)
386+ ; X86-NEXT: movl %edx, 4(%eax)
387+ ; X86-NEXT: movl %ecx, (%eax)
388+ ; X86-NEXT: popl %esi
389+ ; X86-NEXT: popl %edi
390+ ; X86-NEXT: retl $4
391+ ;
392+ ; X64-LABEL: shift_zext_shl_vec:
393+ ; X64: # %bb.0:
394+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
395+ ; X64-NEXT: pxor %xmm1, %xmm1
396+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
397+ ; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
398+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
399+ ; X64-NEXT: retq
400+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
401+ %b = zext <4 x i8 > %a to <4 x i16 >
402+ %c = shl <4 x i16 > %b , <i16 9 , i16 8 , i16 7 , i16 6 >
403+ %d = zext <4 x i16 > %c to <4 x i32 >
404+ ret <4 x i32 > %d
405+ }
406+
407+ define <4 x i32 > @shift_zext_shl2_vec (<4 x i8 > %x ) nounwind {
408+ ; X86-LABEL: shift_zext_shl2_vec:
409+ ; X86: # %bb.0:
410+ ; X86-NEXT: pushl %edi
411+ ; X86-NEXT: pushl %esi
412+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
413+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
414+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
415+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
416+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
417+ ; X86-NEXT: andl $23, %edi
418+ ; X86-NEXT: andl $31, %esi
419+ ; X86-NEXT: andl $63, %edx
420+ ; X86-NEXT: andl $64, %ecx
421+ ; X86-NEXT: shll $9, %ecx
422+ ; X86-NEXT: shll $8, %edx
423+ ; X86-NEXT: shll $7, %esi
424+ ; X86-NEXT: shll $6, %edi
425+ ; X86-NEXT: movl %edi, 12(%eax)
426+ ; X86-NEXT: movl %esi, 8(%eax)
427+ ; X86-NEXT: movl %edx, 4(%eax)
428+ ; X86-NEXT: movl %ecx, (%eax)
429+ ; X86-NEXT: popl %esi
430+ ; X86-NEXT: popl %edi
431+ ; X86-NEXT: retl $4
432+ ;
433+ ; X64-LABEL: shift_zext_shl2_vec:
434+ ; X64: # %bb.0:
435+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
436+ ; X64-NEXT: pxor %xmm1, %xmm1
437+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
438+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
439+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
440+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
441+ ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
442+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
443+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
444+ ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
445+ ; X64-NEXT: retq
446+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
447+ %b = zext <4 x i8 > %a to <4 x i32 >
448+ %c = shl <4 x i32 > %b , <i32 9 , i32 8 , i32 7 , i32 6 >
449+ ret <4 x i32 > %c
450+ }
451+
325452declare dso_local void @f (i64 )
326453
0 commit comments