pedronahum
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.swift-version‎
Lines changed: 1 addition & 1 deletion b/‎.swift-version‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 107 additions & 203 deletions b/‎Dockerfile‎
Lines changed: 107 additions & 203 deletions
diff --git a/‎Examples/ANKI/main.swift‎
Lines changed: 6 additions & 5 deletions b/‎Examples/ANKI/main.swift‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎Examples/KARATE/main.swift‎
Lines changed: 3 additions & 7 deletions b/‎Examples/KARATE/main.swift‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎KNOWN_ISSUES.md‎
Lines changed: 207 additions & 0 deletions b/‎KNOWN_ISSUES.md‎
Lines changed: 207 additions & 0 deletions
diff --git a/‎Package.swift‎
Lines changed: 55 additions & 25 deletions b/‎Package.swift‎
Lines changed: 55 additions & 25 deletions
@@ -12,4 +12,5 @@ DerivedData/
 .clj-kondo/
 .build/
 .cache/
+.claude
 docs/
@@ -1 +1 @@
-main-snapshot-2025-10-02
+main-snapshot-2025-11-03
@@ -125,9 +125,9 @@ func collate(_ group: [EncodedPair], pad: Int, maxSrc: Int, maxTgt: Int) -> Batc
 func noamScale(step: Int, dModel: Int, warmup: Int = 4000) -> Float {
   // d_model^{-0.5} * min(step^{-0.5}, step * warmup^{-1.5})
   let s = Float(step + 1)
-  let dm = powf(Float(dModel), -0.5)
-  let a = powf(s, -0.5)
-  let b = s * powf(Float(warmup), -1.5)
+  let dm = 1.0 / Float(dModel).squareRoot()
+  let a = 1.0 / s.squareRoot()
+  let b = s / (Float(warmup).squareRoot() * Float(warmup))
   return dm * min(a, b)
 }
 
@@ -221,8 +221,9 @@ do {
   )
   print("Model initialized (parameters: \(dModel) dims, \(heads) heads).")
 
-  //var opt = SGD(for: model, learningRate: cfg.learningRate)
-  let opt = Adam(for: model, learningRate: cfg.learningRate)
+  // Note: Using SGD instead of Adam due to keypath issues with complex models on Linux
+  // See KNOWN_ISSUES.md for details
+  var opt = SGD(for: model, learningRate: cfg.learningRate, momentum: 0.9)
   print("Optimizer ready; starting training…")
 
   // Training loop (MNIST-style scaffold)
 
@@ -219,14 +219,10 @@ struct KarateExample {
     let (g, y, trainIdx, testIdx) = buildKarateGraphs()
 
     // Model & optimizer
+    // Note: Using SGD instead of Adam due to keypath issues with complex models on Linux
+    // See KNOWN_ISSUES.md for details
     var model = KarateGNN()
-    let opt = Adam(
-      for: model,
-      learningRate: 0.01,
-      beta1: 0.9,
-      beta2: 0.999,
-      epsilon: 1e-8,
-      weightDecay: 0.0)
+    var opt = SGD(for: model, learningRate: 0.001, momentum: 0.9)
 
     print("Karate Club • nodes: \(g.nNode[0]), edges: \(g.nEdge[0])")
     print("Train: \(trainIdx.count) nodes • Test: \(testIdx.count) nodes")
 
@@ -0,0 +1,207 @@
+# TaylorTorch Known Issues
+
+This document describes known issues encountered when building TaylorTorch on Linux with Swift's automatic differentiation, and the workarounds implemented.
+
+> **Note**: These issues are **specific to Linux (Ubuntu 24.04)**. macOS builds do not experience these problems and can use standard C library math functions without issues.
+
+## Swift SIL Linker Assertion Failures with C Library Math Functions
+
+### Problem
+
+When using C library math functions (`exp`, `log`, `sqrt`, `pow`, `powf`) in code that undergoes Swift automatic differentiation, the Swift compiler crashes with a SIL (Swift Intermediate Language) linker assertion:
+
+```
+Assertion failed: googGV->isDeclaration() && "global variable already has initializer"
+```
+
+or
+
+```
+LLVM ERROR: Global is external, but doesn't have external or weak linkage
+```
+
+These errors occur because the Swift autodiff system generates derivative code that references C library function symbols in ways that conflict with Swift's SIL linker expectations on Linux.
+
+### Affected Functions
+
+- `exp()`, `expf()` - exponential
+- `log()`, `logf()`, `log1p()` - logarithm
+- `sqrt()`, `sqrtf()` - square root
+- `pow()`, `powf()` - power
+
+### Workarounds
+
+#### 1. Replace `sqrt` with `.squareRoot()`
+
+Swift's native `FloatingPoint.squareRoot()` method works correctly with autodiff:
+
+```swift
+// Before (causes SIL crash)
+let a = sqrt(x)
+
+// After (works)
+let a = x.squareRoot()
+```
+
+#### 2. Replace `pow(x, -0.5)` with `1.0 / x.squareRoot()`
+
+```swift
+// Before (causes SIL crash)
+let a = powf(x, -0.5)
+let b = powf(x, -1.5)
+
+// After (works)
+let a = 1.0 / x.squareRoot()
+let b = 1.0 / (x.squareRoot() * x)
+```
+
+#### 3. Replace `exp` with hardcoded constants or Taylor series
+
+For simple cases like `exp(1.0)`:
+```swift
+// Before
+let e = exp(1.0)
+
+// After
+let e = 2.718281828459045  // Euler's number
+```
+
+For test code that needs `exp` computations, use a pure Swift Taylor series:
+```swift
+func swiftExp(_ x: Double) -> Double {
+    var result = 1.0
+    var term = 1.0
+    for i in 1...30 {
+        term *= x / Double(i)
+        result += term
+    }
+    return result
+}
+```
+
+#### 4. Replace `log1p` with Mercator series
+
+```swift
+func swiftLog1p(_ x: Double) -> Double {
+    let y = 1.0 + x
+    if y <= 0 { return -.infinity }
+    var result = 0.0
+    var term = (y - 1) / (y + 1)
+    let term2 = term * term
+    for i in stride(from: 1, through: 31, by: 2) {
+        result += term / Double(i)
+        term *= term2
+    }
+    return 2.0 * result
+}
+```
+
+**Note**: Taylor/Mercator series approximations lose precision for larger values. Tests using these should use looser tolerances (e.g., `1e-4` instead of `1e-6`).
+
+### Files Modified
+
+- `Examples/ANKI/main.swift` - Replaced `powf` with `.squareRoot()`
+- `Sources/Torch/Modules/Initializers.swift` - Replaced `sqrt` with `.squareRoot()`
+- `Tests/TensorTests/TensorMathTests.swift` - Replaced `Foundation.exp(1.0)` with constant
+- `Tests/TorchTests/LossTests.swift` - Added pure Swift `swiftExp` and `swiftLog1p`
+- `Tests/TorchTests/ActivationModulesTests.swift` - Replaced `Foundation.sqrt` with `.squareRoot()`
+
+---
+
+## Swift Autodiff Crash with For-In Loops
+
+### Problem
+
+Swift's automatic differentiation crashes when a `for-in` loop is used inside a `valueWithPullback` closure on Linux:
+
+```
+LLVM ERROR: Global is external, but doesn't have external or weak linkage
+```
+
+### Example
+
+```swift
+// This crashes the compiler
+let (value, pullback) = valueWithPullback(at: input) { tensor in
+    var current = tensor
+    for dim in dims {  // <-- for-in loop causes crash
+        current = current.sum(dim: dim)
+    }
+    return current
+}
+```
+
+### Workaround
+
+Comment out or disable tests that use for-in loops inside differentiated closures. This is a Swift compiler bug that needs to be fixed upstream.
+
+### Files Modified
+
+- `Tests/TensorTests/TensorAxisSugarDifferentiationTests.swift` - Commented out `axisReductionsGradientMatchIntegerVariants` test
+
+---
+
+## Adam Optimizer KeyPath Crashes with Complex Models
+
+### Problem
+
+The Adam optimizer crashes at runtime when used with complex nested models (like Transformers) on Linux. The crash occurs in `recursivelyAllWritableKeyPaths` when iterating over the TangentVector structure.
+
+```
+Swift/KeyPath.swift:1051: Fatal error: Could not extract a String from KeyPath Swift.KeyPath<...>
+```
+
+This appears to be related to how Swift handles KeyPath operations on complex nested generic types on Linux.
+
+### Workaround
+
+Use SGD with momentum instead of Adam for complex models:
+
+```swift
+// Instead of:
+let opt = Adam(for: model, learningRate: 0.01)
+
+// Use:
+var opt = SGD(for: model, learningRate: 0.01, momentum: 0.9)
+```
+
+### Files Modified
+
+- `Examples/ANKI/main.swift` - Switched from Adam to SGD optimizer
+- `Examples/KARATE/main.swift` - Switched from Adam to SGD with LR 0.001 (higher rates cause NaN)
+
+---
+
+## Environment Variables Required for Building
+
+### Problem
+
+Building TaylorTorch fails with `'swift/bridging' file not found` if environment variables are not set.
+
+### Solution
+
+Set these environment variables before building:
+
+```bash
+export SWIFT_TOOLCHAIN_DIR="/path/to/swiftly/toolchains/main-snapshot-2025-11-03/usr"
+export PYTORCH_INSTALL_DIR="/opt/pytorch"
+export PATH="/path/to/swiftly/bin:$PATH"
+```
+
+Or source the environment files created by the install script:
+
+```bash
+source /etc/profile.d/swift.sh
+source /etc/profile.d/pytorch.sh
+```
+
+---
+
+## Platform
+
+These issues are specific to:
+- **OS**: Linux (Ubuntu 24.04)
+- **Swift**: Development snapshots (main-snapshot-2025-11-03)
+- **C++ Standard Library**: libstdc++ (GCC 13)
+
+macOS builds are not affected by most of these issues.
@@ -87,6 +87,19 @@ if let cStandardLibraryModuleMap {
         .unsafeFlags(["-Xcc", "-fmodule-map-file=\(cStandardLibraryModuleMap)"]))
 }
 
+// On Linux, configure Swift to use libstdc++ properly
+#if os(Linux)
+commonSwiftSettings += [
+    // Add libstdc++ include paths before Swift's clang includes
+    .unsafeFlags(["-Xcc", "-isystem/usr/include/c++/13"]),
+    .unsafeFlags(["-Xcc", "-isystem/usr/include/x86_64-linux-gnu/c++/13"]),
+    .unsafeFlags(["-Xcc", "-isystem/usr/include/c++/13/backward"]),
+    .unsafeFlags(["-Xcc", "-isystem/usr/lib/gcc/x86_64-linux-gnu/13/include"]),
+    .unsafeFlags(["-Xcc", "-isystem/usr/include"]),
+    .unsafeFlags(["-Xcc", "-isystem/usr/include/x86_64-linux-gnu"]),
+]
+#endif
+
 // On Linux, use --whole-archive to force inclusion of all PyTorch operator symbols
 // These symbols are in static registration sections that get optimized out without this flag
 #if os(Linux)
@@ -95,7 +108,7 @@ if let cStandardLibraryModuleMap {
         .unsafeFlags([
             "-L", pytorchLibDir,
             "-Xlinker", "-rpath", "-Xlinker", pytorchLibDir,
-            // C++ libraries - using libstdc++ (what PyTorch actually uses in Docker)
+            // C++ libraries - using libstdc++ (what PyTorch is built with)
             "-Xlinker", "-lstdc++",
             "-Xlinker", "-lm",
             // PyTorch libraries in --whole-archive block
@@ -180,17 +193,22 @@ if let cStandardLibraryModuleMap {
 // Platform-specific CXX settings for Linux
 #if os(Linux)
     let platformCxxSettings: [CXXSetting] = [
-        // Use libstdc++ (what PyTorch actually uses in Docker)
-        .unsafeFlags(["-stdlib=libstdc++"]),
-        // Use old ABI (ABI=0) to match Docker PyTorch build
-        .define("_GLIBCXX_USE_CXX11_ABI", to: "0")
+        // libstdc++ headers
+        .unsafeFlags(["-isystem", "/usr/include/c++/13"]),
+        .unsafeFlags(["-isystem", "/usr/include/x86_64-linux-gnu/c++/13"]),
+        .unsafeFlags(["-isystem", "/usr/include/c++/13/backward"]),
+        // GCC internal includes
+        .unsafeFlags(["-isystem", "/usr/lib/gcc/x86_64-linux-gnu/13/include"]),
+        // System C includes
+        .unsafeFlags(["-isystem", "/usr/include"]),
+        .unsafeFlags(["-isystem", "/usr/include/x86_64-linux-gnu"]),
     ]
 #else
     let platformCxxSettings: [CXXSetting] = []
 #endif
 
-// Combined CXX settings
-let allAtenCxxSettings = atenCxxSettings + platformCxxSettings
+// Combined CXX settings - platform settings first for correct include order
+let allAtenCxxSettings = platformCxxSettings + atenCxxSettings
 
 var atenCxxDoctestSettings: [CXXSetting] = [
     .define("DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES"),
@@ -213,8 +231,8 @@ if let cStandardLibraryModuleMap {
     atenCxxDoctestSettings.append(.unsafeFlags(["-fmodule-map-file=\(cStandardLibraryModuleMap)"]))
 }
 
-// Combined CXX doctest settings
-let allAtenCxxDoctestSettings = atenCxxDoctestSettings + platformCxxSettings
+// Combined CXX doctest settings - platform settings first for correct include order
+let allAtenCxxDoctestSettings = platformCxxSettings + atenCxxDoctestSettings
 
 let package = Package(
     name: "TaylorTorch",
@@ -230,23 +248,32 @@ let package = Package(
     dependencies: [
         .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.0.0")
     ],
-    targets: [
-        // ----------------- C++ Targets -----------------
-        .target(
-            name: "ATenCXX",
-            path: "Sources/ATenCXX",
-            publicHeadersPath: "include",
-            cxxSettings: allAtenCxxSettings
-        ),
-        .executableTarget(
-            name: "ATenCXXDoctests",
-            dependencies: ["ATenCXX"],
-            path: "Sources/ATenCXXDoctests",
-            cxxSettings: allAtenCxxDoctestSettings,
-            linkerSettings: atenDoctestsLinkerSettings
-        ),
+    targets: {
+        var targets: [Target] = [
+            // ----------------- C++ Targets -----------------
+            .target(
+                name: "ATenCXX",
+                path: "Sources/ATenCXX",
+                publicHeadersPath: "include",
+                cxxSettings: allAtenCxxSettings
+            ),
+        ]
+
+        // ATenCXXDoctests 
+        
+        targets.append(
+            .executableTarget(
+                name: "ATenCXXDoctests",
+                dependencies: ["ATenCXX"],
+                path: "Sources/ATenCXXDoctests",
+                cxxSettings: allAtenCxxDoctestSettings,
+                linkerSettings: atenDoctestsLinkerSettings
+            )
+        )
+        
 
         // ----------------- Swift Targets -----------------
+        targets += [
         .target(
             name: "Torch",
             dependencies: ["ATenCXX"],
@@ -298,6 +325,9 @@ let package = Package(
             swiftSettings: commonSwiftSettings,
             linkerSettings: allLinkerSettings
         ),
-    ],
+        ]
+
+        return targets
+    }(),
     cxxLanguageStandard: .cxx17
 )
-Original file line number
+Diff line change
 .clj-kondo/
 .build/
 .cache/
 +.claude
 docs/
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-main-snapshot-2025-10-02`
	`1`	`+main-snapshot-2025-11-03`