Skip to content

Conversation

@bwendling
Copy link
Collaborator

The KCFI typeid may be negative, but will result in this type of bitcode:

module asm ".weak __kcfi_typeid_func"
module asm ".set __kcfi_typeid_func, 2874394057"

// ...

define dso_local i32 @call_unsigned(ptr noundef %f) #0 !kcfi_type !6 {
  // ...
  %call = call i32 %0(i32 noundef 37) [ "kcfi"(i32 -1420573239) ]
  ret i32 %call
}

declare !kcfi_type !7 noundef i32 @foo(i32 noundef)

// ...

!7 = !{i32 -1420573239}

The __kcfi_typeid_func value doesn't equal the metadata value. Therefore, we sign extend the typeid value rather than zero extend.

This also reorganizes the testcase to remove the "-DAG" checks, which are a bit confusing at first.

The KCFI typeid may be negative, but will result in this type of
bitcode:

    module asm ".weak __kcfi_typeid_func"
    module asm ".set __kcfi_typeid_func, 2874394057"

    // ...

    define dso_local i32 @call_unsigned(ptr noundef %f) #0 !kcfi_type !6 {
      // ...
      %call = call i32 %0(i32 noundef 37) [ "kcfi"(i32 -1420573239) ]
      ret i32 %call
    }

    declare !kcfi_type !7 noundef i32 @foo(i32 noundef)

    // ...

    !7 = !{i32 -1420573239}

The __kcfi_typeid_func value doesn't equal the metadata value.
Therefore, we sign extend the typeid value rather than zero extend.

This also reorganizes the testcase to remove the "-DAG" checks, which
are a bit confusing at first.

Signed-off-by: Bill Wendling <[email protected]>
@bwendling bwendling requested a review from samitolvanen April 22, 2025 17:50
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:codegen IR generation bugs: mangling, exceptions, etc. labels Apr 22, 2025
@llvmbot
Copy link
Member

llvmbot commented Apr 22, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Bill Wendling (bwendling)

Changes

The KCFI typeid may be negative, but will result in this type of bitcode:

module asm ".weak __kcfi_typeid_func"
module asm ".set __kcfi_typeid_func, 2874394057"

// ...

define dso_local i32 @<!-- -->call_unsigned(ptr noundef %f) #<!-- -->0 !kcfi_type !6 {
  // ...
  %call = call i32 %0(i32 noundef 37) [ "kcfi"(i32 -1420573239) ]
  ret i32 %call
}

declare !kcfi_type !7 noundef i32 @<!-- -->foo(i32 noundef)

// ...

!7 = !{i32 -1420573239}

The __kcfi_typeid_func value doesn't equal the metadata value. Therefore, we sign extend the typeid value rather than zero extend.

This also reorganizes the testcase to remove the "-DAG" checks, which are a bit confusing at first.


Full diff: https://github.com/llvm/llvm-project/pull/136734.diff

2 Files Affected:

  • (modified) clang/lib/CodeGen/CodeGenModule.cpp (+1-1)
  • (modified) clang/test/CodeGen/kcfi.c (+56-26)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 83d8d4f758195..0422f4ab1cebb 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2931,7 +2931,7 @@ void CodeGenModule::finalizeKCFITypes() {
       continue;
 
     std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" +
-                       Name + ", " + Twine(Type->getZExtValue()) + "\n")
+                       Name + ", " + Twine(Type->getSExtValue()) + "\n")
                           .str();
     M.appendModuleInlineAsm(Asm);
   }
diff --git a/clang/test/CodeGen/kcfi.c b/clang/test/CodeGen/kcfi.c
index 622843cedba50..058f3277a8119 100644
--- a/clang/test/CodeGen/kcfi.c
+++ b/clang/test/CodeGen/kcfi.c
@@ -8,18 +8,25 @@
 /// Must emit __kcfi_typeid symbols for address-taken function declarations
 // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]"
 // CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]"
+// CHECK: module asm ".weak __kcfi_typeid_[[F4_ARG:[a-zA-Z0-9_]+]]"
+// CHECK: module asm ".set __kcfi_typeid_[[F4_ARG]], [[#%d,ARG_HASH:]]"
+
 /// Must not __kcfi_typeid symbols for non-address-taken declarations
 // CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}"
 
 // C: @ifunc1 = ifunc i32 (i32), ptr @resolver1
 // C: @ifunc2 = ifunc i64 (i64), ptr @resolver2
 typedef int (*fn_t)(void);
+typedef int (*fn_u_t)(unsigned int);
 
-// CHECK: define dso_local{{.*}} i32 @{{f1|_Z2f1v}}(){{.*}} !kcfi_type ![[#TYPE:]]
-int f1(void) { return 0; }
+int f1(void);
 
-// CHECK: define dso_local{{.*}} i32 @{{f2|_Z2f2v}}(){{.*}} !kcfi_type ![[#TYPE2:]]
-unsigned int f2(void) { return 2; }
+unsigned int f2(void);
+
+static int f3(void);
+
+extern int f4(void);
+extern int f4_arg(unsigned int);
 
 // CHECK-LABEL: define dso_local{{.*}} i32 @{{__call|_Z6__callPFivE}}(ptr{{.*}} %f)
 int __call(fn_t f) __attribute__((__no_sanitize__("kcfi"))) {
@@ -27,17 +34,45 @@ int __call(fn_t f) __attribute__((__no_sanitize__("kcfi"))) {
   return f();
 }
 
-// CHECK: define dso_local{{.*}} i32 @{{call|_Z4callPFivE}}(ptr{{.*}} %f){{.*}}
+// CHECK-LABEL: define dso_local{{.*}} i32 @{{call|_Z4callPFivE}}(ptr{{.*}} %f)
 int call(fn_t f) {
   // CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#HASH]]) ]
   return f();
 }
 
-// CHECK-DAG: define internal{{.*}} i32 @{{f3|_ZL2f3v}}(){{.*}} !kcfi_type ![[#TYPE]]
+// CHECK-LABEL: define dso_local{{.*}} i32 @{{call_with_arg|_Z13call_with_argPFijE}}(ptr{{.*}} %f)
+int call_with_arg(fn_u_t f) {
+  // CHECK: call{{.*}} i32 %0(i32 {{.*}}) [ "kcfi"(i32 [[#ARG_HASH]]) ]
+  return f(42);
+}
+
+static int f5(void);
+
+extern int f6(void);
+
+int test(void) {
+  return call(f1) +
+         __call((fn_t)f2) +
+         call(f3) +
+         call(f4) +
+         call_with_arg(f4_arg) +
+         f5() +
+         f6();
+}
+
+// CHECK-LABEL: define dso_local{{.*}} i32 @{{f1|_Z2f1v}}(){{.*}} !kcfi_type ![[#TYPE:]]
+int f1(void) { return 0; }
+
+// CHECK-LABEL: define dso_local{{.*}} i32 @{{f2|_Z2f2v}}(){{.*}} !kcfi_type ![[#TYPE2:]]
+unsigned int f2(void) { return 2; }
+
+// CHECK: define internal{{.*}} i32 @{{f3|_ZL2f3v}}(){{.*}} !kcfi_type ![[#TYPE]]
 static int f3(void) { return 1; }
 
-// CHECK-DAG: declare !kcfi_type ![[#TYPE]]{{.*}} i32 @[[F4]]()
-extern int f4(void);
+// CHECK-LABEL: declare !kcfi_type
+// CHECK-SAME:    ![[#TYPE]]{{.*}} i32 @[[F4]]
+// CHECK-LABEL: declare !kcfi_type
+// CHECK-SAME:    ![[#ARG_TYPE:]]{{.*}} i32 @[[F4_ARG]]
 
 /// Must not emit !kcfi_type for non-address-taken local functions
 // CHECK: define internal{{.*}} i32 @{{f5|_ZL2f5v}}()
@@ -45,8 +80,7 @@ extern int f4(void);
 // CHECK-SAME: {
 static int f5(void) { return 2; }
 
-// CHECK-DAG: declare !kcfi_type ![[#TYPE]]{{.*}} i32 @{{f6|_Z2f6v}}()
-extern int f6(void);
+// CHECK: declare !kcfi_type ![[#TYPE]]{{.*}} i32 @{{f6|_Z2f6v}}()
 
 #ifndef __cplusplus
 // C: define internal ptr @resolver1() #[[#]] !kcfi_type ![[#]] {
@@ -58,30 +92,26 @@ static void *resolver2(void) { return 0; }
 long ifunc2(long) __attribute__((ifunc("resolver2")));
 #endif
 
-int test(void) {
-  return call(f1) +
-         __call((fn_t)f2) +
-         call(f3) +
-         call(f4) +
-         f5() +
-         f6();
-}
-
 #ifdef __cplusplus
 struct A {
-  // MEMBER-DAG: define{{.*}} void @_ZN1A1fEv(ptr{{.*}} %this){{.*}} !kcfi_type ![[#TYPE3:]]
   void f() {}
 };
 
 void test_member_call(void) {
   void (A::* p)() = &A::f;
-  // MEMBER-DAG: call void %[[#]](ptr{{.*}} [ "kcfi"(i32 [[#%d,HASH3:]]) ]
+  // MEMBER: call void %[[#]](ptr{{.*}} [ "kcfi"(i32 [[#%d,HASH3:]]) ]
   (A().*p)();
 }
+
+// MEMBER: define{{.*}} void @_ZN1A1fEv(ptr{{.*}} %this){{.*}} !kcfi_type ![[#TYPE3:]]
 #endif
 
-// CHECK-DAG: ![[#]] = !{i32 4, !"kcfi", i32 1}
-// OFFSET-DAG: ![[#]] = !{i32 4, !"kcfi-offset", i32 3}
-// CHECK-DAG: ![[#TYPE]] = !{i32 [[#HASH]]}
-// CHECK-DAG: ![[#TYPE2]] = !{i32 [[#%d,HASH2:]]}
-// MEMBER-DAG: ![[#TYPE3]] = !{i32 [[#HASH3]]}
+// CHECK: ![[#]] = !{i32 4, !"kcfi", i32 1}
+//
+// OFFSET: ![[#]] = !{i32 4, !"kcfi-offset", i32 3}
+//
+// CHECK: ![[#TYPE]] = !{i32 [[#HASH]]}
+// CHECK: ![[#TYPE2]] = !{i32 [[#%d,HASH2:]]}
+// CHECK: ![[#ARG_TYPE]] = !{i32 [[#ARG_HASH]]}
+//
+// MEMBER: ![[#TYPE3]] = !{i32 [[#HASH3]]}

@samitolvanen
Copy link
Member

Unfortunately, the __kcfi_typeid_ values are intentionally zero-extended, so they can be used in relocations with movl. With your patch applied:

...
  LD      .tmp_vmlinux1
ld.lld: error: vmlinux.o:(function __cfi___memcpy: .noinstr.text+0x4ac1): relocation R_X86_64_32 out of range: 18446744071952041092 is not in [0, 4294967295]; references '__kcfi_typeid___memcpy'
>>> referenced by usercopy_64.c
>>> defined in vmlinux.o
...
$ llvm-objdump -d -r arch/x86/lib/memcpy_64.o
...
0000000000000010 <__cfi___memcpy>:
      10: b8 00 00 00 00                movl    $0x0, %eax
                0000000000000011:  R_X86_64_32  __kcfi_typeid___memcpy
...
$ llvm-readelf --symbols vmlinux.o | grep __kcfi_typeid___memcpy
214399: ffffffff973e8484     0 NOTYPE  WEAK   DEFAULT   ABS __kcfi_typeid___memcpy

See the __CFI_TYPE definition here:
https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/linkage.h#n100

In hindsight, adding a test to LLVM about this real-world __kcfi_typeid_ use case would probably be a good idea.

@bwendling
Copy link
Collaborator Author

Unfortunately, the __kcfi_typeid_ values are intentionally zero-extended, so they can be used in relocations with movl. With your patch applied:

Okay, so what should we do about the testcase? I'm hitting it with the 'kcfi-salt' testcases, which are loosely based on kcfi.c.

@samitolvanen
Copy link
Member

Okay, so what should we do about the testcase? I'm hitting it with the 'kcfi-salt' testcases, which are loosely based on kcfi.c.

You could hardcode the hash values in the test instead of just checking that they match? clang/test/CodeGen/kcfi-normalize.c already does this. If the hash function ever changes, this would require updating the tests too, but since that would also break cross-language CFI compatibility with rustc, I suspect that won't be a problem.

@bwendling
Copy link
Collaborator Author

Okay, I'll leave the current kcfi.c alone and do that for the new testcase. Thanks. :-)

@bwendling bwendling closed this Apr 23, 2025
@bwendling bwendling deleted the kcfi-bug branch April 30, 2025 23:39
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

clang:codegen IR generation bugs: mangling, exceptions, etc. clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants