1111; This is explained (with the motivation for such an optimization) in
1212; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1313
14- define i8 @add8 (ptr %p ) {
14+ define i8 @add8 (ptr %p ) # 0 {
1515; X64-LABEL: add8:
1616; X64: # %bb.0:
1717; X64-NEXT: #MEMBARRIER
@@ -39,7 +39,7 @@ define i8 @add8(ptr %p) {
3939 ret i8 %1
4040}
4141
42- define i16 @or16 (ptr %p ) {
42+ define i16 @or16 (ptr %p ) # 0 {
4343; X64-LABEL: or16:
4444; X64: # %bb.0:
4545; X64-NEXT: #MEMBARRIER
@@ -67,7 +67,7 @@ define i16 @or16(ptr %p) {
6767 ret i16 %1
6868}
6969
70- define i32 @xor32 (ptr %p ) {
70+ define i32 @xor32 (ptr %p ) # 0 {
7171; X64-LABEL: xor32:
7272; X64: # %bb.0:
7373; X64-NEXT: #MEMBARRIER
@@ -95,7 +95,7 @@ define i32 @xor32(ptr %p) {
9595 ret i32 %1
9696}
9797
98- define i64 @sub64 (ptr %p ) {
98+ define i64 @sub64 (ptr %p ) # 0 {
9999; X64-LABEL: sub64:
100100; X64: # %bb.0:
101101; X64-NEXT: #MEMBARRIER
@@ -105,11 +105,7 @@ define i64 @sub64(ptr %p) {
105105; X86-LABEL: sub64:
106106; X86: # %bb.0:
107107; X86-NEXT: pushl %ebx
108- ; X86-NEXT: .cfi_def_cfa_offset 8
109108; X86-NEXT: pushl %esi
110- ; X86-NEXT: .cfi_def_cfa_offset 12
111- ; X86-NEXT: .cfi_offset %esi, -12
112- ; X86-NEXT: .cfi_offset %ebx, -8
113109; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
114110; X86-NEXT: movl (%esi), %eax
115111; X86-NEXT: movl 4(%esi), %edx
@@ -122,42 +118,32 @@ define i64 @sub64(ptr %p) {
122118; X86-NEXT: jne .LBB3_1
123119; X86-NEXT: # %bb.2: # %atomicrmw.end
124120; X86-NEXT: popl %esi
125- ; X86-NEXT: .cfi_def_cfa_offset 8
126121; X86-NEXT: popl %ebx
127- ; X86-NEXT: .cfi_def_cfa_offset 4
128122; X86-NEXT: retl
129123 %1 = atomicrmw sub ptr %p , i64 0 syncscope("singlethread" ) seq_cst
130124 ret i64 %1
131125}
132126
133- define i128 @or128 (ptr %p ) {
127+ define i128 @or128 (ptr %p ) # 0 {
134128; X64-LABEL: or128:
135129; X64: # %bb.0:
136130; X64-NEXT: pushq %rax
137- ; X64-NEXT: .cfi_def_cfa_offset 16
138131; X64-NEXT: xorl %esi, %esi
139132; X64-NEXT: xorl %edx, %edx
140133; X64-NEXT: xorl %ecx, %ecx
141134; X64-NEXT: callq __atomic_fetch_or_16@PLT
142135; X64-NEXT: popq %rcx
143- ; X64-NEXT: .cfi_def_cfa_offset 8
144136; X64-NEXT: retq
145137;
146138; X86-GENERIC-LABEL: or128:
147139; X86-GENERIC: # %bb.0:
148140; X86-GENERIC-NEXT: pushl %ebp
149- ; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
150- ; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
151141; X86-GENERIC-NEXT: movl %esp, %ebp
152- ; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
153142; X86-GENERIC-NEXT: pushl %ebx
154143; X86-GENERIC-NEXT: pushl %edi
155144; X86-GENERIC-NEXT: pushl %esi
156145; X86-GENERIC-NEXT: andl $-16, %esp
157146; X86-GENERIC-NEXT: subl $48, %esp
158- ; X86-GENERIC-NEXT: .cfi_offset %esi, -20
159- ; X86-GENERIC-NEXT: .cfi_offset %edi, -16
160- ; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
161147; X86-GENERIC-NEXT: movl 12(%ebp), %edi
162148; X86-GENERIC-NEXT: movl 12(%edi), %ecx
163149; X86-GENERIC-NEXT: movl 8(%edi), %edx
@@ -201,24 +187,17 @@ define i128 @or128(ptr %p) {
201187; X86-GENERIC-NEXT: popl %edi
202188; X86-GENERIC-NEXT: popl %ebx
203189; X86-GENERIC-NEXT: popl %ebp
204- ; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
205190; X86-GENERIC-NEXT: retl $4
206191;
207192; X86-ATOM-LABEL: or128:
208193; X86-ATOM: # %bb.0:
209194; X86-ATOM-NEXT: pushl %ebp
210- ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
211- ; X86-ATOM-NEXT: .cfi_offset %ebp, -8
212195; X86-ATOM-NEXT: movl %esp, %ebp
213- ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
214196; X86-ATOM-NEXT: pushl %ebx
215197; X86-ATOM-NEXT: pushl %edi
216198; X86-ATOM-NEXT: pushl %esi
217199; X86-ATOM-NEXT: andl $-16, %esp
218200; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
219- ; X86-ATOM-NEXT: .cfi_offset %esi, -20
220- ; X86-ATOM-NEXT: .cfi_offset %edi, -16
221- ; X86-ATOM-NEXT: .cfi_offset %ebx, -12
222201; X86-ATOM-NEXT: movl 12(%ebp), %edi
223202; X86-ATOM-NEXT: movl 12(%edi), %ecx
224203; X86-ATOM-NEXT: movl 8(%edi), %edx
@@ -262,14 +241,13 @@ define i128 @or128(ptr %p) {
262241; X86-ATOM-NEXT: popl %edi
263242; X86-ATOM-NEXT: popl %ebx
264243; X86-ATOM-NEXT: popl %ebp
265- ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
266244; X86-ATOM-NEXT: retl $4
267245 %1 = atomicrmw or ptr %p , i128 0 syncscope("singlethread" ) monotonic
268246 ret i128 %1
269247}
270248
271249; For 'and', the idempotent value is (-1)
272- define i32 @and32 (ptr %p ) {
250+ define i32 @and32 (ptr %p ) # 0 {
273251; X64-LABEL: and32:
274252; X64: # %bb.0:
275253; X64-NEXT: #MEMBARRIER
@@ -297,7 +275,7 @@ define i32 @and32 (ptr %p) {
297275 ret i32 %1
298276}
299277
300- define void @or32_nouse_monotonic (ptr %p ) {
278+ define void @or32_nouse_monotonic (ptr %p ) # 0 {
301279; X64-LABEL: or32_nouse_monotonic:
302280; X64: # %bb.0:
303281; X64-NEXT: #MEMBARRIER
@@ -325,7 +303,7 @@ define void @or32_nouse_monotonic(ptr %p) {
325303}
326304
327305
328- define void @or32_nouse_acquire (ptr %p ) {
306+ define void @or32_nouse_acquire (ptr %p ) # 0 {
329307; X64-LABEL: or32_nouse_acquire:
330308; X64: # %bb.0:
331309; X64-NEXT: #MEMBARRIER
@@ -352,7 +330,7 @@ define void @or32_nouse_acquire(ptr %p) {
352330 ret void
353331}
354332
355- define void @or32_nouse_release (ptr %p ) {
333+ define void @or32_nouse_release (ptr %p ) # 0 {
356334; X64-LABEL: or32_nouse_release:
357335; X64: # %bb.0:
358336; X64-NEXT: #MEMBARRIER
@@ -379,7 +357,7 @@ define void @or32_nouse_release(ptr %p) {
379357 ret void
380358}
381359
382- define void @or32_nouse_acq_rel (ptr %p ) {
360+ define void @or32_nouse_acq_rel (ptr %p ) # 0 {
383361; X64-LABEL: or32_nouse_acq_rel:
384362; X64: # %bb.0:
385363; X64-NEXT: #MEMBARRIER
@@ -406,7 +384,7 @@ define void @or32_nouse_acq_rel(ptr %p) {
406384 ret void
407385}
408386
409- define void @or32_nouse_seq_cst (ptr %p ) {
387+ define void @or32_nouse_seq_cst (ptr %p ) # 0 {
410388; X64-LABEL: or32_nouse_seq_cst:
411389; X64: # %bb.0:
412390; X64-NEXT: #MEMBARRIER
@@ -434,7 +412,7 @@ define void @or32_nouse_seq_cst(ptr %p) {
434412}
435413
436414; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
437- define void @or64_nouse_seq_cst (ptr %p ) {
415+ define void @or64_nouse_seq_cst (ptr %p ) # 0 {
438416; X64-LABEL: or64_nouse_seq_cst:
439417; X64: # %bb.0:
440418; X64-NEXT: #MEMBARRIER
@@ -443,11 +421,7 @@ define void @or64_nouse_seq_cst(ptr %p) {
443421; X86-LABEL: or64_nouse_seq_cst:
444422; X86: # %bb.0:
445423; X86-NEXT: pushl %ebx
446- ; X86-NEXT: .cfi_def_cfa_offset 8
447424; X86-NEXT: pushl %esi
448- ; X86-NEXT: .cfi_def_cfa_offset 12
449- ; X86-NEXT: .cfi_offset %esi, -12
450- ; X86-NEXT: .cfi_offset %ebx, -8
451425; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
452426; X86-NEXT: movl (%esi), %eax
453427; X86-NEXT: movl 4(%esi), %edx
@@ -460,43 +434,33 @@ define void @or64_nouse_seq_cst(ptr %p) {
460434; X86-NEXT: jne .LBB11_1
461435; X86-NEXT: # %bb.2: # %atomicrmw.end
462436; X86-NEXT: popl %esi
463- ; X86-NEXT: .cfi_def_cfa_offset 8
464437; X86-NEXT: popl %ebx
465- ; X86-NEXT: .cfi_def_cfa_offset 4
466438; X86-NEXT: retl
467439 atomicrmw or ptr %p , i64 0 syncscope("singlethread" ) seq_cst
468440 ret void
469441}
470442
471443; TODO: Don't need to lower as sync_and_fetch call
472- define void @or128_nouse_seq_cst (ptr %p ) {
444+ define void @or128_nouse_seq_cst (ptr %p ) # 0 {
473445; X64-LABEL: or128_nouse_seq_cst:
474446; X64: # %bb.0:
475447; X64-NEXT: pushq %rax
476- ; X64-NEXT: .cfi_def_cfa_offset 16
477448; X64-NEXT: xorl %esi, %esi
478449; X64-NEXT: xorl %edx, %edx
479450; X64-NEXT: movl $5, %ecx
480451; X64-NEXT: callq __atomic_fetch_or_16@PLT
481452; X64-NEXT: popq %rax
482- ; X64-NEXT: .cfi_def_cfa_offset 8
483453; X64-NEXT: retq
484454;
485455; X86-GENERIC-LABEL: or128_nouse_seq_cst:
486456; X86-GENERIC: # %bb.0:
487457; X86-GENERIC-NEXT: pushl %ebp
488- ; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
489- ; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
490458; X86-GENERIC-NEXT: movl %esp, %ebp
491- ; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
492459; X86-GENERIC-NEXT: pushl %ebx
493460; X86-GENERIC-NEXT: pushl %edi
494461; X86-GENERIC-NEXT: pushl %esi
495462; X86-GENERIC-NEXT: andl $-16, %esp
496463; X86-GENERIC-NEXT: subl $48, %esp
497- ; X86-GENERIC-NEXT: .cfi_offset %esi, -20
498- ; X86-GENERIC-NEXT: .cfi_offset %edi, -16
499- ; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
500464; X86-GENERIC-NEXT: movl 8(%ebp), %esi
501465; X86-GENERIC-NEXT: movl 12(%esi), %ecx
502466; X86-GENERIC-NEXT: movl 8(%esi), %edi
@@ -535,24 +499,17 @@ define void @or128_nouse_seq_cst(ptr %p) {
535499; X86-GENERIC-NEXT: popl %edi
536500; X86-GENERIC-NEXT: popl %ebx
537501; X86-GENERIC-NEXT: popl %ebp
538- ; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
539502; X86-GENERIC-NEXT: retl
540503;
541504; X86-ATOM-LABEL: or128_nouse_seq_cst:
542505; X86-ATOM: # %bb.0:
543506; X86-ATOM-NEXT: pushl %ebp
544- ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
545- ; X86-ATOM-NEXT: .cfi_offset %ebp, -8
546507; X86-ATOM-NEXT: movl %esp, %ebp
547- ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
548508; X86-ATOM-NEXT: pushl %ebx
549509; X86-ATOM-NEXT: pushl %edi
550510; X86-ATOM-NEXT: pushl %esi
551511; X86-ATOM-NEXT: andl $-16, %esp
552512; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
553- ; X86-ATOM-NEXT: .cfi_offset %esi, -20
554- ; X86-ATOM-NEXT: .cfi_offset %edi, -16
555- ; X86-ATOM-NEXT: .cfi_offset %ebx, -12
556513; X86-ATOM-NEXT: movl 8(%ebp), %esi
557514; X86-ATOM-NEXT: movl %esp, %ebx
558515; X86-ATOM-NEXT: movl 12(%esi), %ecx
@@ -591,14 +548,13 @@ define void @or128_nouse_seq_cst(ptr %p) {
591548; X86-ATOM-NEXT: popl %edi
592549; X86-ATOM-NEXT: popl %ebx
593550; X86-ATOM-NEXT: popl %ebp
594- ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
595551; X86-ATOM-NEXT: retl
596552 atomicrmw or ptr %p , i128 0 syncscope("singlethread" ) seq_cst
597553 ret void
598554}
599555
600556
601- define void @or16_nouse_seq_cst (ptr %p ) {
557+ define void @or16_nouse_seq_cst (ptr %p ) # 0 {
602558; X64-LABEL: or16_nouse_seq_cst:
603559; X64: # %bb.0:
604560; X64-NEXT: #MEMBARRIER
@@ -625,7 +581,7 @@ define void @or16_nouse_seq_cst(ptr %p) {
625581 ret void
626582}
627583
628- define void @or8_nouse_seq_cst (ptr %p ) {
584+ define void @or8_nouse_seq_cst (ptr %p ) # 0 {
629585; X64-LABEL: or8_nouse_seq_cst:
630586; X64: # %bb.0:
631587; X64-NEXT: #MEMBARRIER
@@ -651,6 +607,9 @@ define void @or8_nouse_seq_cst(ptr %p) {
651607 atomicrmw or ptr %p , i8 0 syncscope("singlethread" ) seq_cst
652608 ret void
653609}
610+
611+ attributes #0 = { nounwind }
612+
654613;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
655614; X86-SLM: {{.*}}
656615; X86-SSE2: {{.*}}
0 commit comments