From 0d6b846c0d7344a9a15e83121068e78523fd4328 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 17 Jul 2025 05:32:13 +0000 Subject: [PATCH 1/5] Treat ';' and '\n' as assembly instruction separators in collectAsmInstrs This also fixes the incorrect treatment of '\n\t' as a separator for asm instructions. --- llvm/include/llvm/IR/InlineAsm.h | 7 +++- llvm/lib/Analysis/InlineCost.cpp | 21 ++++------- .../SelectionDAG/SelectionDAGBuilder.cpp | 5 ++- llvm/lib/IR/InlineAsm.cpp | 35 +++++++++++++------ .../X86/inline-asm-function-call-pic.ll | 2 +- .../CodeGen/X86/inline-asm-p-constraint.ll | 5 ++- .../Inline/inline-call-with-asm-call.ll | 2 +- 7 files changed, 43 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 96887d129a69f..6144d2f07335b 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -87,7 +87,12 @@ class InlineAsm final : public Value { StringRef getAsmString() const { return AsmString; } StringRef getConstraintString() const { return Constraints; } - LLVM_ABI void collectAsmStrs(SmallVectorImpl &AsmStrs) const; + + /// collectAsmInstrs - Parses the assembly instruction and collects individual + /// instructions in a vector. Handles both '\n' and ';' as instruction + /// separators. Trims comments (marked by '#' and "//") and whitespaces from + /// instructions. + LLVM_ABI SmallVector collectAsmInstrs() const; /// This static method can be used by the parser to check to see if the /// specified constraint string is legal for the type. diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 22f4d08448a22..35e3a8f731db3 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -793,33 +793,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { void onInlineAsm(const InlineAsm &Arg) override { if (!InlineAsmInstrCost) return; - SmallVector AsmStrs; - Arg.collectAsmStrs(AsmStrs); int SectionLevel = 0; int InlineAsmInstrCount = 0; - for (StringRef AsmStr : AsmStrs) { - // Trim whitespaces and comments. - StringRef Trimmed = AsmStr.trim(); - size_t hashPos = Trimmed.find('#'); - if (hashPos != StringRef::npos) - Trimmed = Trimmed.substr(0, hashPos); - // Ignore comments. - if (Trimmed.empty()) - continue; + for (StringRef AsmInstr : Arg.collectAsmInstrs()) { // Filter out the outlined assembly instructions from the cost by keeping // track of the section level and only accounting for instrutions at // section level of zero. Note there will be duplication in outlined // sections too, but is not accounted in the inlining cost model. - if (Trimmed.starts_with(".pushsection")) { + if (AsmInstr.starts_with(".pushsection")) { ++SectionLevel; continue; } - if (Trimmed.starts_with(".popsection")) { + if (AsmInstr.starts_with(".popsection")) { --SectionLevel; continue; } - // Ignore directives and labels. - if (Trimmed.starts_with(".") || Trimmed.contains(":")) + // Labels are free. Note we only exclude labels that are not followed by + // any other instruction. + if (AsmInstr.ends_with(":")) continue; if (SectionLevel == 0) ++InlineAsmInstrCount; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 74c14ede24755..24dfd3a757cdd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10006,8 +10006,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } int OpNo = -1; - SmallVector AsmStrs; - IA->collectAsmStrs(AsmStrs); + SmallVector AsmInstrs = IA->collectAsmInstrs(); // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { @@ -10051,7 +10050,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // label, so here we don't handle jmp function label now, but we need to // enhance it (especilly in PIC model) if we meet meaningful requirements. if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) && - TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) && + TLI.isInlineAsmTargetBranch(AsmInstrs, OpNo) && TM.getCodeModel() != CodeModel::Large) { OpInfo.isIndirect = false; OpInfo.ConstraintType = TargetLowering::C_Address; diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index 922081468a775..a6f83dcdf3462 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -60,17 +60,32 @@ FunctionType *InlineAsm::getFunctionType() const { return FTy; } -void InlineAsm::collectAsmStrs(SmallVectorImpl &AsmStrs) const { +SmallVector InlineAsm::collectAsmInstrs() const { + if (AsmString.empty()) + return {}; StringRef AsmStr(AsmString); - AsmStrs.clear(); - - // TODO: 1) Unify delimiter for inline asm, we also meet other delimiters - // for example "\0A", ";". - // 2) Enhance StringRef. Some of the special delimiter ("\0") can't be - // split in StringRef. Also empty StringRef can not call split (will stuck). - if (AsmStr.empty()) - return; - AsmStr.split(AsmStrs, "\n\t", -1, false); + // First break the assembly string into lines. + SmallVector AsmLines; + AsmStr.split(AsmLines, '\n'); + + SmallVector AsmInstrs; + AsmInstrs.reserve(AsmLines.size()); + for (StringRef &AsmLine : AsmLines) { + // First remove the comments. Note it's important to do this before breaking + // by ';' since the comment portion may include that character too. + AsmLine = AsmLine.split('#').first.split("//").first; + if (AsmLine.empty()) + continue; + // Break by ';' to collect separate instructions in a single line. + SmallVector CurrentLineAsmInstrs; + AsmLine.split(CurrentLineAsmInstrs, ';'); + for (StringRef S : CurrentLineAsmInstrs) { + StringRef Trimmed = S.trim(); + if (!Trimmed.empty()) + AsmInstrs.push_back(Trimmed); + } + } + return AsmInstrs; } /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll index d3ca872509ad5..cabba1ede609a 100644 --- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll +++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll @@ -59,7 +59,7 @@ define void @func() local_unnamed_addr #0 { entry: %call = tail call i32 @static_func() ;; We test call, CALL, and jmp. - tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}\0A\09jmp ${1:P}\0A\09shr eax, $$0\0A\09shr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0\0A\09shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0 + tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}; jmp ${1:P}\0A\09shr eax, $$0\0Ashr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0; shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0 ret void } diff --git a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll index 50185343662b7..0837514a021fd 100644 --- a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll @@ -7,10 +7,9 @@ define ptr @foo(ptr %Ptr) { %Ptr.addr = alloca ptr, align 8 store ptr %Ptr, ptr %Ptr.addr, align 8 ; CHECK: movq %rdi, -8(%rsp) - %1 = tail call ptr asm "mov $1, $0\0A\09lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr) + %1 = tail call ptr asm "mov $1, $0; lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr) ; CHECK-NEXT: #APP -; CHECK-NEXT: mov (%rdi), %rax -; CHECK-NEXT: lea -8(%rsp), %rax +; CHECK-NEXT: mov (%rdi), %rax; lea -8(%rsp), %rax ; CHECK-NEXT: #NO_APP ret ptr %1 ; CHECK-NEXT: retq diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll index 7d8121d04996e..0018afdf4968a 100644 --- a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll +++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll @@ -27,7 +27,7 @@ define void @caller(i32 %a, i1 %b) #0 { ;; destination section and two assembly instructions in the outlined "other" ;; section. define void @callee(i32 %a, i1 %b) { - call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""() + call void asm sideeffect "s_nop 1 # some comment ; still comment \09.pushsection other; s_nop 2 \0A s_nop 3 \0A.popsection\0A s_nop 4; label:\0A", ""() ret void } ; CHECK: define void @callee From d716bcf485044bb110df01f0f9f83976732df5c8 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 18 Jul 2025 17:29:52 +0000 Subject: [PATCH 2/5] Elaborate comment and remove empty AsmString handling. --- llvm/include/llvm/IR/InlineAsm.h | 4 ++-- llvm/lib/IR/InlineAsm.cpp | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 6144d2f07335b..eb13a70a5fad6 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -88,8 +88,8 @@ class InlineAsm final : public Value { StringRef getAsmString() const { return AsmString; } StringRef getConstraintString() const { return Constraints; } - /// collectAsmInstrs - Parses the assembly instruction and collects individual - /// instructions in a vector. Handles both '\n' and ';' as instruction + /// collectAsmInstrs - Parses the assembly instruction and returns individual + /// non-empty instructions in a vector. Handles both '\n' and ';' as instruction /// separators. Trims comments (marked by '#' and "//") and whitespaces from /// instructions. LLVM_ABI SmallVector collectAsmInstrs() const; diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index a6f83dcdf3462..12f671e824cdf 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -61,8 +61,6 @@ FunctionType *InlineAsm::getFunctionType() const { } SmallVector InlineAsm::collectAsmInstrs() const { - if (AsmString.empty()) - return {}; StringRef AsmStr(AsmString); // First break the assembly string into lines. SmallVector AsmLines; From 1053d21fb87a910ff4cafa24e8ffaa6fa2ccf3b0 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 18 Jul 2025 17:30:05 +0000 Subject: [PATCH 3/5] clang-format. --- llvm/include/llvm/IR/InlineAsm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index eb13a70a5fad6..3ba0211d7bd99 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -89,9 +89,9 @@ class InlineAsm final : public Value { StringRef getConstraintString() const { return Constraints; } /// collectAsmInstrs - Parses the assembly instruction and returns individual - /// non-empty instructions in a vector. Handles both '\n' and ';' as instruction - /// separators. Trims comments (marked by '#' and "//") and whitespaces from - /// instructions. + /// non-empty instructions in a vector. Handles both '\n' and ';' as + /// instruction separators. Trims comments (marked by '#' and "//") and + /// whitespaces from instructions. LLVM_ABI SmallVector collectAsmInstrs() const; /// This static method can be used by the parser to check to see if the From fdd37da4c2ce3210eec80f1eaa7161c145b39b6a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 18 Jul 2025 17:34:50 +0000 Subject: [PATCH 4/5] Fix function comment. --- llvm/include/llvm/IR/InlineAsm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 3ba0211d7bd99..8c51851066315 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -88,10 +88,10 @@ class InlineAsm final : public Value { StringRef getAsmString() const { return AsmString; } StringRef getConstraintString() const { return Constraints; } - /// collectAsmInstrs - Parses the assembly instruction and returns individual - /// non-empty instructions in a vector. Handles both '\n' and ';' as - /// instruction separators. Trims comments (marked by '#' and "//") and - /// whitespaces from instructions. + /// Parses the assembly instruction and returns individual non-empty + /// instructions in a vector. Handles both '\n' and ';' as instruction + /// separators. Trims comments (marked by '#' and "//") and whitespaces from + /// instructions. LLVM_ABI SmallVector collectAsmInstrs() const; /// This static method can be used by the parser to check to see if the From 425fbc19a78c70e4520e8338be66700e79009daa Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 21 Jul 2025 17:01:18 +0000 Subject: [PATCH 5/5] Stop handling ';' as instruction separator. --- llvm/include/llvm/IR/InlineAsm.h | 6 +++--- llvm/lib/IR/InlineAsm.cpp | 21 +++++++------------ .../X86/inline-asm-function-call-pic.ll | 2 +- .../CodeGen/X86/inline-asm-p-constraint.ll | 5 +++-- .../Inline/inline-call-with-asm-call.ll | 2 +- 5 files changed, 15 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 8c51851066315..ae1fa42842e29 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -89,9 +89,9 @@ class InlineAsm final : public Value { StringRef getConstraintString() const { return Constraints; } /// Parses the assembly instruction and returns individual non-empty - /// instructions in a vector. Handles both '\n' and ';' as instruction - /// separators. Trims comments (marked by '#' and "//") and whitespaces from - /// instructions. + /// instructions in a vector. Treats '\n' as instruction separator, but not + /// ';' (due to conflict with MachO comment). + /// Trims comments (marked by '#' and "//") and whitespaces from instructions. LLVM_ABI SmallVector collectAsmInstrs() const; /// This static method can be used by the parser to check to see if the diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index 12f671e824cdf..7b68efe26f928 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -62,26 +62,19 @@ FunctionType *InlineAsm::getFunctionType() const { SmallVector InlineAsm::collectAsmInstrs() const { StringRef AsmStr(AsmString); - // First break the assembly string into lines. SmallVector AsmLines; AsmStr.split(AsmLines, '\n'); SmallVector AsmInstrs; AsmInstrs.reserve(AsmLines.size()); - for (StringRef &AsmLine : AsmLines) { - // First remove the comments. Note it's important to do this before breaking - // by ';' since the comment portion may include that character too. - AsmLine = AsmLine.split('#').first.split("//").first; - if (AsmLine.empty()) + for (StringRef AsmLine : AsmLines) { + // Trim most general comments. We don't handle comment blocks (/* ... */). + // We also don't handle '@' (ARM) and ';' (MachO) since they have different + // interpretations in different targets and we don't have target info in IR. + auto Trimmed = AsmLine.split('#').first.split("//").first.trim(); + if (Trimmed.empty()) continue; - // Break by ';' to collect separate instructions in a single line. - SmallVector CurrentLineAsmInstrs; - AsmLine.split(CurrentLineAsmInstrs, ';'); - for (StringRef S : CurrentLineAsmInstrs) { - StringRef Trimmed = S.trim(); - if (!Trimmed.empty()) - AsmInstrs.push_back(Trimmed); - } + AsmInstrs.push_back(Trimmed); } return AsmInstrs; } diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll index cabba1ede609a..d3ca872509ad5 100644 --- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll +++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll @@ -59,7 +59,7 @@ define void @func() local_unnamed_addr #0 { entry: %call = tail call i32 @static_func() ;; We test call, CALL, and jmp. - tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}; jmp ${1:P}\0A\09shr eax, $$0\0Ashr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0; shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0 + tail call void asm sideeffect inteldialect "call ${0:P}\0A\09CALL ${1:P}\0A\09jmp ${1:P}\0A\09shr eax, $$0\0A\09shr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0\0A\09shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #0 ret void } diff --git a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll index 0837514a021fd..50185343662b7 100644 --- a/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-p-constraint.ll @@ -7,9 +7,10 @@ define ptr @foo(ptr %Ptr) { %Ptr.addr = alloca ptr, align 8 store ptr %Ptr, ptr %Ptr.addr, align 8 ; CHECK: movq %rdi, -8(%rsp) - %1 = tail call ptr asm "mov $1, $0; lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr) + %1 = tail call ptr asm "mov $1, $0\0A\09lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr) ; CHECK-NEXT: #APP -; CHECK-NEXT: mov (%rdi), %rax; lea -8(%rsp), %rax +; CHECK-NEXT: mov (%rdi), %rax +; CHECK-NEXT: lea -8(%rsp), %rax ; CHECK-NEXT: #NO_APP ret ptr %1 ; CHECK-NEXT: retq diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll index 0018afdf4968a..2f22b3b4edd35 100644 --- a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll +++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll @@ -27,7 +27,7 @@ define void @caller(i32 %a, i1 %b) #0 { ;; destination section and two assembly instructions in the outlined "other" ;; section. define void @callee(i32 %a, i1 %b) { - call void asm sideeffect "s_nop 1 # some comment ; still comment \09.pushsection other; s_nop 2 \0A s_nop 3 \0A.popsection\0A s_nop 4; label:\0A", ""() + call void asm sideeffect "s_nop 1 # some comment \0A.pushsection other\0A s_nop 2 \0A s_nop 3 \0A.popsection\0A s_nop 4\0A label:\0A", ""() ret void } ; CHECK: define void @callee