Skip to content

Commit 1dca40c

Browse files
Merge branch 'main' into uint_to_fp
2 parents 74f5327 + 32d761b commit 1dca40c

File tree

9 files changed

+90
-51
lines changed

9 files changed

+90
-51
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ using namespace llvm::opt;
1717

1818
SYCLInstallationDetector::SYCLInstallationDetector(
1919
const Driver &D, const llvm::Triple &HostTriple,
20-
const llvm::opt::ArgList &Args)
21-
: D(D) {}
20+
const llvm::opt::ArgList &Args) {}
2221

2322
void SYCLInstallationDetector::addSYCLIncludeArgs(
2423
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
@@ -31,8 +30,8 @@ void SYCLInstallationDetector::addSYCLIncludeArgs(
3130
}
3231

3332
// Unsupported options for SYCL device compilation.
34-
static ArrayRef<OptSpecifier> getUnsupportedOpts() {
35-
return {
33+
static ArrayRef<options::ID> getUnsupportedOpts() {
34+
static constexpr options::ID UnsupportedOpts[] = {
3635
options::OPT_fsanitize_EQ, // -fsanitize
3736
options::OPT_fcf_protection_EQ, // -fcf-protection
3837
options::OPT_fprofile_generate,
@@ -53,7 +52,9 @@ static ArrayRef<OptSpecifier> getUnsupportedOpts() {
5352
options::OPT_fprofile_instr_use_EQ, // -fprofile-instr-use
5453
options::OPT_forder_file_instrumentation, // -forder-file-instrumentation
5554
options::OPT_fcs_profile_generate, // -fcs-profile-generate
56-
options::OPT_fcs_profile_generate_EQ};
55+
options::OPT_fcs_profile_generate_EQ,
56+
};
57+
return UnsupportedOpts;
5758
}
5859

5960
SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple,

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ class SYCLInstallationDetector {
2222

2323
void addSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs,
2424
llvm::opt::ArgStringList &CC1Args) const;
25-
26-
private:
27-
const Driver &D;
2825
};
2926

3027
namespace toolchains {

clang/test/Driver/sycl-offload-jit.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
/// Check the phases graph with -fsycl. Use of -fsycl enables offload
44
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fsycl %s 2>&1 \
55
// RUN: | FileCheck -check-prefixes=CHK-PHASES %s
6-
// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl %s 2>&1 \
6+
// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl -- %s 2>&1 \
77
// RUN: | FileCheck -check-prefixes=CHK-PHASES %s
88
// CHK-PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
99
// CHK-PHASES-NEXT: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
@@ -35,7 +35,7 @@
3535
// RUN: | FileCheck -check-prefixes=CHK-FSYCL-IS-DEVICE,CHK-FSYCL-IS-HOST %s
3636
// RUN: %clang -### -fsycl -fsycl-device-only %s 2>&1 \
3737
// RUN: | FileCheck -check-prefix=CHK-FSYCL-IS-DEVICE %s
38-
// RUN: %clang_cl -### -fsycl -c %s 2>&1 \
38+
// RUN: %clang_cl -### -fsycl -c -- %s 2>&1 \
3939
// RUN: | FileCheck -check-prefixes=CHK-FSYCL-IS-DEVICE,CHK-FSYCL-IS-HOST %s
4040
// RUN: %clang -### -fsycl -fsycl-host-only %s 2>&1 \
4141
// RUN: | FileCheck -check-prefix=CHK-FSYCL-IS-HOST %s

llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,12 @@ def N2Write_8c_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
512512
let NumMicroOps = 7;
513513
}
514514

515+
def N2Write_7c_7V0 : SchedWriteRes<[N2UnitV0]> {
516+
let Latency = 7;
517+
let NumMicroOps = 7;
518+
let ReleaseAtCycles = [7];
519+
}
520+
515521
//===----------------------------------------------------------------------===//
516522
// Define generic 8 micro-op types
517523

@@ -547,6 +553,15 @@ def N2Write_9c_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
547553
let NumMicroOps = 8;
548554
}
549555

556+
//===----------------------------------------------------------------------===//
557+
// Define generic 9 micro-op types
558+
559+
def N2Write_9c_9V0 : SchedWriteRes<[N2UnitV0]> {
560+
let Latency = 9;
561+
let NumMicroOps = 9;
562+
let ReleaseAtCycles = [9];
563+
}
564+
550565
//===----------------------------------------------------------------------===//
551566
// Define generic 10 micro-op types
552567

@@ -557,6 +572,12 @@ def N2Write_7c_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
557572
let NumMicroOps = 10;
558573
}
559574

575+
def N2Write_10c_10V0 : SchedWriteRes<[N2UnitV0]> {
576+
let Latency = 10;
577+
let NumMicroOps = 10;
578+
let ReleaseAtCycles = [10];
579+
}
580+
560581
//===----------------------------------------------------------------------===//
561582
// Define generic 12 micro-op types
562583

@@ -580,6 +601,21 @@ def N2Write_7c_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
580601
let NumMicroOps = 15;
581602
}
582603

604+
def N2Write_15c_15V0 : SchedWriteRes<[N2UnitV0]> {
605+
let Latency = 15;
606+
let NumMicroOps = 15;
607+
let ReleaseAtCycles = [15];
608+
}
609+
610+
//===----------------------------------------------------------------------===//
611+
// Define generic 16 micro-op types
612+
613+
def N2Write_16c_16V0 : SchedWriteRes<[N2UnitV0]> {
614+
let Latency = 16;
615+
let NumMicroOps = 16;
616+
let ReleaseAtCycles = [16];
617+
}
618+
583619
//===----------------------------------------------------------------------===//
584620
// Define generic 18 micro-op types
585621

@@ -795,22 +831,26 @@ def : SchedAlias<WriteF, N2Write_2c_1V>;
795831
// FP compare
796832
def : SchedAlias<WriteFCmp, N2Write_2c_1V0>;
797833

834+
// FP divide and square root operations are performed using an iterative
835+
// algorithm and block subsequent similar operations to the same pipeline
836+
// until complete (Arm Neoverse N2 Software Optimization Guide, 3.14).
837+
798838
// FP divide, square root
799-
def : SchedAlias<WriteFDiv, N2Write_7c_1V0>;
839+
def : SchedAlias<WriteFDiv, N2Write_7c_7V0>;
800840

801841
// FP divide, H-form
802-
def : InstRW<[N2Write_7c_1V0], (instrs FDIVHrr)>;
842+
def : InstRW<[N2Write_7c_7V0], (instrs FDIVHrr)>;
803843
// FP divide, S-form
804-
def : InstRW<[N2Write_10c_1V0], (instrs FDIVSrr)>;
844+
def : InstRW<[N2Write_10c_10V0], (instrs FDIVSrr)>;
805845
// FP divide, D-form
806-
def : InstRW<[N2Write_15c_1V0], (instrs FDIVDrr)>;
846+
def : InstRW<[N2Write_15c_15V0], (instrs FDIVDrr)>;
807847

808848
// FP square root, H-form
809-
def : InstRW<[N2Write_7c_1V0], (instrs FSQRTHr)>;
849+
def : InstRW<[N2Write_7c_7V0], (instrs FSQRTHr)>;
810850
// FP square root, S-form
811-
def : InstRW<[N2Write_9c_1V0], (instrs FSQRTSr)>;
851+
def : InstRW<[N2Write_9c_9V0], (instrs FSQRTSr)>;
812852
// FP square root, D-form
813-
def : InstRW<[N2Write_16c_1V0], (instrs FSQRTDr)>;
853+
def : InstRW<[N2Write_16c_16V0], (instrs FSQRTDr)>;
814854

815855
// FP multiply
816856
def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }

llvm/test/CodeGen/AArch64/machine-combiner.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
262262
; CHECK-UNSAFE-LABEL: reassociate_adds_half:
263263
; CHECK-UNSAFE: // %bb.0:
264264
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
265-
; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
266-
; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
265+
; CHECK-UNSAFE-NEXT: fadd h2, h3, h2
266+
; CHECK-UNSAFE-NEXT: fadd h0, h2, h0
267267
; CHECK-UNSAFE-NEXT: ret
268268
%t0 = fdiv half %x0, %x1
269269
%t1 = fadd half %x2, %t0
@@ -284,8 +284,8 @@ define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
284284
; CHECK-UNSAFE-LABEL: reassociate_muls_half:
285285
; CHECK-UNSAFE: // %bb.0:
286286
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
287-
; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
288-
; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
287+
; CHECK-UNSAFE-NEXT: fmul h2, h3, h2
288+
; CHECK-UNSAFE-NEXT: fmul h0, h2, h0
289289
; CHECK-UNSAFE-NEXT: ret
290290
%t0 = fdiv half %x0, %x1
291291
%t1 = fmul half %x2, %t0

llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,15 @@
6767
## Test the lookup functionality for merged functions:
6868
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
6969
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s
70-
70+
71+
#### TODO: Fix non-determinism leading that is currently worked around with `{{[1-3]}}` below.
72+
7173
# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
72-
# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5
73-
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
74-
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5
74+
# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
75+
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
76+
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
7577

76-
# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
78+
# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
7779

7880

7981
--- !mach-o

llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,7 +1891,7 @@ drps
18911891
# CHECK-NEXT: 1 2 0.50 fmov s0, s1
18921892
# CHECK-NEXT: 1 2 0.50 fabs s2, s3
18931893
# CHECK-NEXT: 1 2 0.50 fneg s4, s5
1894-
# CHECK-NEXT: 1 9 1.00 fsqrt s6, s7
1894+
# CHECK-NEXT: 9 9 9.00 fsqrt s6, s7
18951895
# CHECK-NEXT: 1 3 1.00 fcvt d8, s9
18961896
# CHECK-NEXT: 1 3 1.00 fcvt h10, s11
18971897
# CHECK-NEXT: 1 3 1.00 frintn s12, s13
@@ -1904,7 +1904,7 @@ drps
19041904
# CHECK-NEXT: 1 2 0.50 fmov d0, d1
19051905
# CHECK-NEXT: 1 2 0.50 fabs d2, d3
19061906
# CHECK-NEXT: 1 2 0.50 fneg d4, d5
1907-
# CHECK-NEXT: 1 16 1.00 fsqrt d6, d7
1907+
# CHECK-NEXT: 16 16 16.00 fsqrt d6, d7
19081908
# CHECK-NEXT: 1 3 1.00 fcvt s8, d9
19091909
# CHECK-NEXT: 1 3 1.00 fcvt h10, d11
19101910
# CHECK-NEXT: 1 3 1.00 frintn d12, d13
@@ -1917,7 +1917,7 @@ drps
19171917
# CHECK-NEXT: 1 3 1.00 fcvt s26, h27
19181918
# CHECK-NEXT: 1 3 1.00 fcvt d28, h29
19191919
# CHECK-NEXT: 1 3 0.50 fmul s20, s19, s17
1920-
# CHECK-NEXT: 1 10 1.00 fdiv s1, s2, s3
1920+
# CHECK-NEXT: 10 10 10.00 fdiv s1, s2, s3
19211921
# CHECK-NEXT: 1 2 0.50 fadd s4, s5, s6
19221922
# CHECK-NEXT: 1 2 0.50 fsub s7, s8, s9
19231923
# CHECK-NEXT: 1 2 0.50 fmax s10, s11, s12
@@ -1926,7 +1926,7 @@ drps
19261926
# CHECK-NEXT: 1 2 0.50 fminnm s19, s20, s21
19271927
# CHECK-NEXT: 1 3 0.50 fnmul s22, s23, s2
19281928
# CHECK-NEXT: 1 3 0.50 fmul d20, d19, d17
1929-
# CHECK-NEXT: 1 15 1.00 fdiv d1, d2, d3
1929+
# CHECK-NEXT: 15 15 15.00 fdiv d1, d2, d3
19301930
# CHECK-NEXT: 1 2 0.50 fadd d4, d5, d6
19311931
# CHECK-NEXT: 1 2 0.50 fsub d7, d8, d9
19321932
# CHECK-NEXT: 1 2 0.50 fmax d10, d11, d12
@@ -2557,7 +2557,7 @@ drps
25572557

25582558
# CHECK: Resource pressure per iteration:
25592559
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
2560-
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 169.50 85.50
2560+
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50
25612561

25622562
# CHECK: Resource pressure by instruction:
25632563
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
@@ -3075,7 +3075,7 @@ drps
30753075
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov s0, s1
30763076
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs s2, s3
30773077
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg s4, s5
3078-
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt s6, s7
3078+
# CHECK-NEXT: - - - - - - - - - - - 9.00 - fsqrt s6, s7
30793079
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d8, s9
30803080
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, s11
30813081
# CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn s12, s13
@@ -3088,7 +3088,7 @@ drps
30883088
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov d0, d1
30893089
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs d2, d3
30903090
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg d4, d5
3091-
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt d6, d7
3091+
# CHECK-NEXT: - - - - - - - - - - - 16.00 - fsqrt d6, d7
30923092
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s8, d9
30933093
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, d11
30943094
# CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn d12, d13
@@ -3101,7 +3101,7 @@ drps
31013101
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s26, h27
31023102
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d28, h29
31033103
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul s20, s19, s17
3104-
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv s1, s2, s3
3104+
# CHECK-NEXT: - - - - - - - - - - - 10.00 - fdiv s1, s2, s3
31053105
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd s4, s5, s6
31063106
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub s7, s8, s9
31073107
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax s10, s11, s12
@@ -3110,7 +3110,7 @@ drps
31103110
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fminnm s19, s20, s21
31113111
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fnmul s22, s23, s2
31123112
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul d20, d19, d17
3113-
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv d1, d2, d3
3113+
# CHECK-NEXT: - - - - - - - - - - - 15.00 - fdiv d1, d2, d3
31143114
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd d4, d5, d6
31153115
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub d7, d8, d9
31163116
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax d10, d11, d12

llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -508,11 +508,6 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
508508
}
509509

510510
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
511-
auto logError = [Addr, &OS](Error E) {
512-
OS << HEX64(Addr) << ": ";
513-
logAllUnhandledErrors(std::move(E), OS, "error: ");
514-
};
515-
516511
if (UseMergedFunctions) {
517512
if (auto Results = Gsym.lookupAll(Addr)) {
518513
OS << "Found " << Results->size() << " functions at address "
@@ -526,20 +521,23 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
526521
}
527522
} else { /* UseMergedFunctions == false */
528523
if (auto Result = Gsym.lookup(Addr)) {
524+
// If verbose is enabled dump the full function info for the address.
525+
if (Verbose) {
526+
if (auto FI = Gsym.getFunctionInfo(Addr)) {
527+
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
528+
Gsym.dump(OS, *FI);
529+
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
530+
}
531+
}
529532
OS << Result.get();
530533
} else {
531-
logError(Result.takeError());
532-
return;
533-
}
534-
}
535-
536-
if (Verbose) {
537-
if (auto FI = Gsym.getFunctionInfo(Addr)) {
538-
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
539-
Gsym.dump(OS, *FI);
540-
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
534+
if (Verbose)
535+
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
536+
OS << HEX64(Addr) << ": ";
537+
logAllUnhandledErrors(Result.takeError(), OS, "error: ");
541538
}
542-
OS << "\n";
539+
if (Verbose)
540+
OS << "\n";
543541
}
544542
}
545543

llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ static_library("Driver") {
9595
"ToolChains/RISCVToolchain.cpp",
9696
"ToolChains/SPIRV.cpp",
9797
"ToolChains/SPIRVOpenMP.cpp",
98+
"ToolChains/SYCL.cpp",
9899
"ToolChains/Solaris.cpp",
99100
"ToolChains/TCE.cpp",
100101
"ToolChains/UEFI.cpp",

0 commit comments

Comments
 (0)