Skip to content

Commit 545dd7c

Browse files
committed
Ubuntu-ready
1 parent c556662 commit 545dd7c

24 files changed

+1882
-403
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ DerivedData/
1212
.clj-kondo/
1313
.build/
1414
.cache/
15+
.claude
1516
docs/

.swift-version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
main-snapshot-2025-10-02
1+
main-snapshot-2025-11-03

Dockerfile

Lines changed: 107 additions & 203 deletions
Large diffs are not rendered by default.

Examples/ANKI/main.swift

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ func collate(_ group: [EncodedPair], pad: Int, maxSrc: Int, maxTgt: Int) -> Batc
125125
func noamScale(step: Int, dModel: Int, warmup: Int = 4000) -> Float {
126126
// d_model^{-0.5} * min(step^{-0.5}, step * warmup^{-1.5})
127127
let s = Float(step + 1)
128-
let dm = powf(Float(dModel), -0.5)
129-
let a = powf(s, -0.5)
130-
let b = s * powf(Float(warmup), -1.5)
128+
let dm = 1.0 / Float(dModel).squareRoot()
129+
let a = 1.0 / s.squareRoot()
130+
let b = s / (Float(warmup).squareRoot() * Float(warmup))
131131
return dm * min(a, b)
132132
}
133133

@@ -221,8 +221,9 @@ do {
221221
)
222222
print("Model initialized (parameters: \(dModel) dims, \(heads) heads).")
223223

224-
//var opt = SGD(for: model, learningRate: cfg.learningRate)
225-
let opt = Adam(for: model, learningRate: cfg.learningRate)
224+
// Note: Using SGD instead of Adam due to keypath issues with complex models on Linux
225+
// See KNOWN_ISSUES.md for details
226+
var opt = SGD(for: model, learningRate: cfg.learningRate, momentum: 0.9)
226227
print("Optimizer ready; starting training…")
227228

228229
// Training loop (MNIST-style scaffold)

Examples/KARATE/main.swift

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,10 @@ struct KarateExample {
219219
let (g, y, trainIdx, testIdx) = buildKarateGraphs()
220220

221221
// Model & optimizer
222+
// Note: Using SGD instead of Adam due to keypath issues with complex models on Linux
223+
// See KNOWN_ISSUES.md for details
222224
var model = KarateGNN()
223-
let opt = Adam(
224-
for: model,
225-
learningRate: 0.01,
226-
beta1: 0.9,
227-
beta2: 0.999,
228-
epsilon: 1e-8,
229-
weightDecay: 0.0)
225+
var opt = SGD(for: model, learningRate: 0.001, momentum: 0.9)
230226

231227
print("Karate Club • nodes: \(g.nNode[0]), edges: \(g.nEdge[0])")
232228
print("Train: \(trainIdx.count) nodes • Test: \(testIdx.count) nodes")

KNOWN_ISSUES.md

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# TaylorTorch Known Issues
2+
3+
This document describes known issues encountered when building TaylorTorch on Linux with Swift's automatic differentiation, and the workarounds implemented.
4+
5+
> **Note**: These issues are **specific to Linux (Ubuntu 24.04)**. macOS builds do not experience these problems and can use standard C library math functions without issues.
6+
7+
## Swift SIL Linker Assertion Failures with C Library Math Functions
8+
9+
### Problem
10+
11+
When using C library math functions (`exp`, `log`, `sqrt`, `pow`, `powf`) in code that undergoes Swift automatic differentiation, the Swift compiler crashes with a SIL (Swift Intermediate Language) linker assertion:
12+
13+
```
14+
Assertion failed: googGV->isDeclaration() && "global variable already has initializer"
15+
```
16+
17+
or
18+
19+
```
20+
LLVM ERROR: Global is external, but doesn't have external or weak linkage
21+
```
22+
23+
These errors occur because the Swift autodiff system generates derivative code that references C library function symbols in ways that conflict with Swift's SIL linker expectations on Linux.
24+
25+
### Affected Functions
26+
27+
- `exp()`, `expf()` - exponential
28+
- `log()`, `logf()`, `log1p()` - logarithm
29+
- `sqrt()`, `sqrtf()` - square root
30+
- `pow()`, `powf()` - power
31+
32+
### Workarounds
33+
34+
#### 1. Replace `sqrt` with `.squareRoot()`
35+
36+
Swift's native `FloatingPoint.squareRoot()` method works correctly with autodiff:
37+
38+
```swift
39+
// Before (causes SIL crash)
40+
let a = sqrt(x)
41+
42+
// After (works)
43+
let a = x.squareRoot()
44+
```
45+
46+
#### 2. Replace `pow(x, -0.5)` with `1.0 / x.squareRoot()`
47+
48+
```swift
49+
// Before (causes SIL crash)
50+
let a = powf(x, -0.5)
51+
let b = powf(x, -1.5)
52+
53+
// After (works)
54+
let a = 1.0 / x.squareRoot()
55+
let b = 1.0 / (x.squareRoot() * x)
56+
```
57+
58+
#### 3. Replace `exp` with hardcoded constants or Taylor series
59+
60+
For simple cases like `exp(1.0)`:
61+
```swift
62+
// Before
63+
let e = exp(1.0)
64+
65+
// After
66+
let e = 2.718281828459045 // Euler's number
67+
```
68+
69+
For test code that needs `exp` computations, use a pure Swift Taylor series:
70+
```swift
71+
func swiftExp(_ x: Double) -> Double {
72+
var result = 1.0
73+
var term = 1.0
74+
for i in 1...30 {
75+
term *= x / Double(i)
76+
result += term
77+
}
78+
return result
79+
}
80+
```
81+
82+
#### 4. Replace `log1p` with Mercator series
83+
84+
```swift
85+
func swiftLog1p(_ x: Double) -> Double {
86+
let y = 1.0 + x
87+
if y <= 0 { return -.infinity }
88+
var result = 0.0
89+
var term = (y - 1) / (y + 1)
90+
let term2 = term * term
91+
for i in stride(from: 1, through: 31, by: 2) {
92+
result += term / Double(i)
93+
term *= term2
94+
}
95+
return 2.0 * result
96+
}
97+
```
98+
99+
**Note**: Taylor/Mercator series approximations lose precision for larger values. Tests using these should use looser tolerances (e.g., `1e-4` instead of `1e-6`).
100+
101+
### Files Modified
102+
103+
- `Examples/ANKI/main.swift` - Replaced `powf` with `.squareRoot()`
104+
- `Sources/Torch/Modules/Initializers.swift` - Replaced `sqrt` with `.squareRoot()`
105+
- `Tests/TensorTests/TensorMathTests.swift` - Replaced `Foundation.exp(1.0)` with constant
106+
- `Tests/TorchTests/LossTests.swift` - Added pure Swift `swiftExp` and `swiftLog1p`
107+
- `Tests/TorchTests/ActivationModulesTests.swift` - Replaced `Foundation.sqrt` with `.squareRoot()`
108+
109+
---
110+
111+
## Swift Autodiff Crash with For-In Loops
112+
113+
### Problem
114+
115+
Swift's automatic differentiation crashes when a `for-in` loop is used inside a `valueWithPullback` closure on Linux:
116+
117+
```
118+
LLVM ERROR: Global is external, but doesn't have external or weak linkage
119+
```
120+
121+
### Example
122+
123+
```swift
124+
// This crashes the compiler
125+
let (value, pullback) = valueWithPullback(at: input) { tensor in
126+
var current = tensor
127+
for dim in dims { // <-- for-in loop causes crash
128+
current = current.sum(dim: dim)
129+
}
130+
return current
131+
}
132+
```
133+
134+
### Workaround
135+
136+
Comment out or disable tests that use for-in loops inside differentiated closures. This is a Swift compiler bug that needs to be fixed upstream.
137+
138+
### Files Modified
139+
140+
- `Tests/TensorTests/TensorAxisSugarDifferentiationTests.swift` - Commented out `axisReductionsGradientMatchIntegerVariants` test
141+
142+
---
143+
144+
## Adam Optimizer KeyPath Crashes with Complex Models
145+
146+
### Problem
147+
148+
The Adam optimizer crashes at runtime when used with complex nested models (like Transformers) on Linux. The crash occurs in `recursivelyAllWritableKeyPaths` when iterating over the TangentVector structure.
149+
150+
```
151+
Swift/KeyPath.swift:1051: Fatal error: Could not extract a String from KeyPath Swift.KeyPath<...>
152+
```
153+
154+
This appears to be related to how Swift handles KeyPath operations on complex nested generic types on Linux.
155+
156+
### Workaround
157+
158+
Use SGD with momentum instead of Adam for complex models:
159+
160+
```swift
161+
// Instead of:
162+
let opt = Adam(for: model, learningRate: 0.01)
163+
164+
// Use:
165+
var opt = SGD(for: model, learningRate: 0.01, momentum: 0.9)
166+
```
167+
168+
### Files Modified
169+
170+
- `Examples/ANKI/main.swift` - Switched from Adam to SGD optimizer
171+
- `Examples/KARATE/main.swift` - Switched from Adam to SGD with LR 0.001 (higher rates cause NaN)
172+
173+
---
174+
175+
## Environment Variables Required for Building
176+
177+
### Problem
178+
179+
Building TaylorTorch fails with `'swift/bridging' file not found` if environment variables are not set.
180+
181+
### Solution
182+
183+
Set these environment variables before building:
184+
185+
```bash
186+
export SWIFT_TOOLCHAIN_DIR="/path/to/swiftly/toolchains/main-snapshot-2025-11-03/usr"
187+
export PYTORCH_INSTALL_DIR="/opt/pytorch"
188+
export PATH="/path/to/swiftly/bin:$PATH"
189+
```
190+
191+
Or source the environment files created by the install script:
192+
193+
```bash
194+
source /etc/profile.d/swift.sh
195+
source /etc/profile.d/pytorch.sh
196+
```
197+
198+
---
199+
200+
## Platform
201+
202+
These issues are specific to:
203+
- **OS**: Linux (Ubuntu 24.04)
204+
- **Swift**: Development snapshots (main-snapshot-2025-11-03)
205+
- **C++ Standard Library**: libstdc++ (GCC 13)
206+
207+
macOS builds are not affected by most of these issues.

Package.swift

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,19 @@ if let cStandardLibraryModuleMap {
8787
.unsafeFlags(["-Xcc", "-fmodule-map-file=\(cStandardLibraryModuleMap)"]))
8888
}
8989

90+
// On Linux, configure Swift to use libstdc++ properly
91+
#if os(Linux)
92+
commonSwiftSettings += [
93+
// Add libstdc++ include paths before Swift's clang includes
94+
.unsafeFlags(["-Xcc", "-isystem/usr/include/c++/13"]),
95+
.unsafeFlags(["-Xcc", "-isystem/usr/include/x86_64-linux-gnu/c++/13"]),
96+
.unsafeFlags(["-Xcc", "-isystem/usr/include/c++/13/backward"]),
97+
.unsafeFlags(["-Xcc", "-isystem/usr/lib/gcc/x86_64-linux-gnu/13/include"]),
98+
.unsafeFlags(["-Xcc", "-isystem/usr/include"]),
99+
.unsafeFlags(["-Xcc", "-isystem/usr/include/x86_64-linux-gnu"]),
100+
]
101+
#endif
102+
90103
// On Linux, use --whole-archive to force inclusion of all PyTorch operator symbols
91104
// These symbols are in static registration sections that get optimized out without this flag
92105
#if os(Linux)
@@ -95,7 +108,7 @@ if let cStandardLibraryModuleMap {
95108
.unsafeFlags([
96109
"-L", pytorchLibDir,
97110
"-Xlinker", "-rpath", "-Xlinker", pytorchLibDir,
98-
// C++ libraries - using libstdc++ (what PyTorch actually uses in Docker)
111+
// C++ libraries - using libstdc++ (what PyTorch is built with)
99112
"-Xlinker", "-lstdc++",
100113
"-Xlinker", "-lm",
101114
// PyTorch libraries in --whole-archive block
@@ -180,17 +193,22 @@ if let cStandardLibraryModuleMap {
180193
// Platform-specific CXX settings for Linux
181194
#if os(Linux)
182195
let platformCxxSettings: [CXXSetting] = [
183-
// Use libstdc++ (what PyTorch actually uses in Docker)
184-
.unsafeFlags(["-stdlib=libstdc++"]),
185-
// Use old ABI (ABI=0) to match Docker PyTorch build
186-
.define("_GLIBCXX_USE_CXX11_ABI", to: "0")
196+
// libstdc++ headers
197+
.unsafeFlags(["-isystem", "/usr/include/c++/13"]),
198+
.unsafeFlags(["-isystem", "/usr/include/x86_64-linux-gnu/c++/13"]),
199+
.unsafeFlags(["-isystem", "/usr/include/c++/13/backward"]),
200+
// GCC internal includes
201+
.unsafeFlags(["-isystem", "/usr/lib/gcc/x86_64-linux-gnu/13/include"]),
202+
// System C includes
203+
.unsafeFlags(["-isystem", "/usr/include"]),
204+
.unsafeFlags(["-isystem", "/usr/include/x86_64-linux-gnu"]),
187205
]
188206
#else
189207
let platformCxxSettings: [CXXSetting] = []
190208
#endif
191209

192-
// Combined CXX settings
193-
let allAtenCxxSettings = atenCxxSettings + platformCxxSettings
210+
// Combined CXX settings - platform settings first for correct include order
211+
let allAtenCxxSettings = platformCxxSettings + atenCxxSettings
194212

195213
var atenCxxDoctestSettings: [CXXSetting] = [
196214
.define("DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES"),
@@ -213,8 +231,8 @@ if let cStandardLibraryModuleMap {
213231
atenCxxDoctestSettings.append(.unsafeFlags(["-fmodule-map-file=\(cStandardLibraryModuleMap)"]))
214232
}
215233

216-
// Combined CXX doctest settings
217-
let allAtenCxxDoctestSettings = atenCxxDoctestSettings + platformCxxSettings
234+
// Combined CXX doctest settings - platform settings first for correct include order
235+
let allAtenCxxDoctestSettings = platformCxxSettings + atenCxxDoctestSettings
218236

219237
let package = Package(
220238
name: "TaylorTorch",
@@ -230,23 +248,32 @@ let package = Package(
230248
dependencies: [
231249
.package(url: "https://github.com/apple/swift-docc-plugin", from: "1.0.0")
232250
],
233-
targets: [
234-
// ----------------- C++ Targets -----------------
235-
.target(
236-
name: "ATenCXX",
237-
path: "Sources/ATenCXX",
238-
publicHeadersPath: "include",
239-
cxxSettings: allAtenCxxSettings
240-
),
241-
.executableTarget(
242-
name: "ATenCXXDoctests",
243-
dependencies: ["ATenCXX"],
244-
path: "Sources/ATenCXXDoctests",
245-
cxxSettings: allAtenCxxDoctestSettings,
246-
linkerSettings: atenDoctestsLinkerSettings
247-
),
251+
targets: {
252+
var targets: [Target] = [
253+
// ----------------- C++ Targets -----------------
254+
.target(
255+
name: "ATenCXX",
256+
path: "Sources/ATenCXX",
257+
publicHeadersPath: "include",
258+
cxxSettings: allAtenCxxSettings
259+
),
260+
]
261+
262+
// ATenCXXDoctests
263+
264+
targets.append(
265+
.executableTarget(
266+
name: "ATenCXXDoctests",
267+
dependencies: ["ATenCXX"],
268+
path: "Sources/ATenCXXDoctests",
269+
cxxSettings: allAtenCxxDoctestSettings,
270+
linkerSettings: atenDoctestsLinkerSettings
271+
)
272+
)
273+
248274

249275
// ----------------- Swift Targets -----------------
276+
targets += [
250277
.target(
251278
name: "Torch",
252279
dependencies: ["ATenCXX"],
@@ -298,6 +325,9 @@ let package = Package(
298325
swiftSettings: commonSwiftSettings,
299326
linkerSettings: allLinkerSettings
300327
),
301-
],
328+
]
329+
330+
return targets
331+
}(),
302332
cxxLanguageStandard: .cxx17
303333
)

0 commit comments

Comments
 (0)