WIP XeCryptBnQwNeRsaPubCrypt

has207 · has207 · commit 6c370dcbd742 · 2026-03-10T11:17:28.000+09:00
diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_crypt.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_crypt.cc
@@ -16,11 +16,8 @@
 #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h"
 #include "xenia/xbox.h"
 
-#ifdef XE_PLATFORM_WIN32
-#include "xenia/base/platform_win.h"  // for bcrypt.h
-#endif
-
 #include "third_party/crypto/TinySHA1.hpp"
+#include "third_party/crypto/bignum.cpp"
 #include "third_party/crypto/des/des.cpp"
 #include "third_party/crypto/des/des.h"
 #include "third_party/crypto/des/des3.h"
@@ -418,104 +415,44 @@ dword_result_t XeCryptBnQwNeRsaPubCrypt_entry(pointer_t<uint64_t> qw_a,
                                               pointer_t<uint64_t> qw_b,
                                               pointer_t<XECRYPT_RSA> rsa) {
   // 0 indicates failure (but not a BOOL return value)
-#ifndef XE_PLATFORM_WIN32
-  XELOGE(
-      "XeCryptBnQwNeRsaPubCrypt called but no implementation available for "
-      "this platform!");
-  assert_always();
-  return 1;
-#else
-  uint32_t modulus_size = rsa->size * 8;
-
-  // Convert XECRYPT blob into BCrypt format
-  ULONG key_size = sizeof(BCRYPT_RSAKEY_BLOB) + sizeof(uint32_t) + modulus_size;
-  auto key_buf = std::make_unique<uint8_t[]>(key_size);
-  auto* key_header = reinterpret_cast<BCRYPT_RSAKEY_BLOB*>(key_buf.get());
-
-  key_header->Magic = BCRYPT_RSAPUBLIC_MAGIC;
-  key_header->BitLength = modulus_size * 8;
-  key_header->cbPublicExp = sizeof(uint32_t);
-  key_header->cbModulus = modulus_size;
-  key_header->cbPrime1 = key_header->cbPrime2 = 0;
-
-  // Copy in exponent/modulus, luckily these are BE inside BCrypt blob
-  uint32_t* key_exponent = reinterpret_cast<uint32_t*>(&key_header[1]);
-  *key_exponent = rsa->public_exponent.value;
-
-  // ...except modulus needs to be reversed in 64-bit chunks for BCrypt to make
-  // use of it properly for some reason
-  uint64_t* key_modulus = reinterpret_cast<uint64_t*>(&key_exponent[1]);
-  uint64_t* xecrypt_modulus = reinterpret_cast<uint64_t*>(&rsa[1]);
-  std::reverse_copy(xecrypt_modulus, xecrypt_modulus + rsa->size, key_modulus);
-
-  BCRYPT_ALG_HANDLE hAlgorithm = NULL;
-  NTSTATUS status = BCryptOpenAlgorithmProvider(
-      &hAlgorithm, BCRYPT_RSA_ALGORITHM, MS_PRIMITIVE_PROVIDER, 0);
-
-  if (!BCRYPT_SUCCESS(status)) {
-    XELOGE(
-        "XeCryptBnQwNeRsaPubCrypt: BCryptOpenAlgorithmProvider failed with "
-        "status {:#X}!",
-        status);
-    return 0;
+  uint32_t num_qwords = rsa->size;
+  uint32_t modulus_size = num_qwords * 8;
+  uint32_t exponent = rsa->public_exponent;
+
+  // Xbox stores bignums as BE uint64 limbs in LE limb order.
+  // To get a flat big-endian byte array, reverse the qword order.
+  // The bytes within each qword are already BE (raw byte layout preserved).
+  auto input_be = std::vector<uint8_t>(modulus_size);
+  auto mod_be = std::vector<uint8_t>(modulus_size);
+
+  const uint8_t* input_bytes = reinterpret_cast<const uint8_t*>(&qw_a[0]);
+  const uint8_t* mod_bytes =
+      reinterpret_cast<const uint8_t*>(&rsa[1]);  // modulus follows header
+
+  // Reverse qword order to produce big-endian byte arrays
+  for (uint32_t i = 0; i < num_qwords; i++) {
+    std::memcpy(&input_be[i * 8], &input_bytes[(num_qwords - 1 - i) * 8], 8);
+    std::memcpy(&mod_be[i * 8], &mod_bytes[(num_qwords - 1 - i) * 8], 8);
   }
 
-  BCRYPT_KEY_HANDLE hKey = NULL;
-  status = BCryptImportKeyPair(hAlgorithm, NULL, BCRYPT_RSAPUBLIC_BLOB, &hKey,
-                               key_buf.get(), key_size, 0);
+  auto base = bignum::BigNum::from_bytes_be(input_be.data(), modulus_size);
+  auto modulus = bignum::BigNum::from_bytes_be(mod_be.data(), modulus_size);
 
-  if (!BCRYPT_SUCCESS(status)) {
-    XELOGE(
-        "XeCryptBnQwNeRsaPubCrypt: BCryptImportKeyPair failed with status "
-        "{:#X}!",
-        status);
+  auto result = bignum::BigNum::modexp(base, exponent, modulus);
 
-    if (hAlgorithm) {
-      BCryptCloseAlgorithmProvider(hAlgorithm, 0);
-    }
+  // Convert result back to big-endian bytes
+  auto result_be = std::vector<uint8_t>(modulus_size);
+  result.to_bytes_be(result_be.data(), modulus_size);
 
-    return 0;
+  // Convert back to Xbox format: reverse qword order
+  uint8_t* output_bytes = reinterpret_cast<uint8_t*>(&qw_b[0]);
+  for (uint32_t i = 0; i < num_qwords; i++) {
+    std::memcpy(&output_bytes[i * 8], &result_be[(num_qwords - 1 - i) * 8], 8);
   }
 
-  // Byteswap & reverse the input into output, as BCrypt wants MSB first
-  uint64_t* output = qw_b;
-  uint8_t* output_bytes = reinterpret_cast<uint8_t*>(output);
-  xe::copy_and_swap<uint64_t>(output, qw_a, rsa->size);
-  std::reverse(output_bytes, output_bytes + modulus_size);
-
-  // BCryptDecrypt only works with private keys, fortunately BCryptEncrypt
-  // performs the right actions needed for us to decrypt the input
-  ULONG result_size = 0;
-  status =
-      BCryptEncrypt(hKey, output_bytes, modulus_size, nullptr, nullptr, 0,
-                    output_bytes, modulus_size, &result_size, BCRYPT_PAD_NONE);
-
-  assert(result_size == modulus_size);
-
-  if (!BCRYPT_SUCCESS(status)) {
-    XELOGE("XeCryptBnQwNeRsaPubCrypt: BCryptEncrypt failed with status {:#X}!",
-           status);
-  } else {
-    // Reverse data & byteswap again so data is as game expects
-    std::reverse(output_bytes, output_bytes + modulus_size);
-    xe::copy_and_swap(output, output, rsa->size);
-  }
-
-  if (hKey) {
-    BCryptDestroyKey(hKey);
-  }
-  if (hAlgorithm) {
-    BCryptCloseAlgorithmProvider(hAlgorithm, 0);
-  }
-
-  return BCRYPT_SUCCESS(status) ? 1 : 0;
-#endif
+  return 1;
 }
-#ifdef XE_PLATFORM_WIN32
 DECLARE_XBOXKRNL_EXPORT1(XeCryptBnQwNeRsaPubCrypt, kNone, kImplemented);
-#else
-DECLARE_XBOXKRNL_EXPORT1(XeCryptBnQwNeRsaPubCrypt, kNone, kStub);
-#endif
 
 dword_result_t XeCryptBnDwLePkcs1Verify_entry(lpvoid_t hash, lpvoid_t sig,
                                               dword_t size) {
diff --git a/third_party/crypto/bignum.cpp b/third_party/crypto/bignum.cpp
@@ -0,0 +1,241 @@
+#include "third_party/crypto/bignum.h"
+
+namespace bignum {
+
+void BigNum::trim() {
+  while (limbs.size() > 1 && limbs.back() == 0) {
+    limbs.pop_back();
+  }
+}
+
+BigNum BigNum::from_bytes_be(const uint8_t* data, size_t len) {
+  BigNum r;
+  // Number of 8-byte limbs, rounding up
+  size_t n = (len + 7) / 8;
+  r.limbs.resize(n, 0);
+
+  // Read bytes big-endian into little-endian limbs
+  for (size_t i = 0; i < len; i++) {
+    size_t byte_pos = len - 1 - i;  // position from LSB
+    r.limbs[byte_pos / 8] |= static_cast<uint64_t>(data[i])
+                              << (8 * (byte_pos % 8));
+  }
+
+  r.trim();
+  return r;
+}
+
+void BigNum::to_bytes_be(uint8_t* out, size_t len) const {
+  std::memset(out, 0, len);
+  for (size_t i = 0; i < len; i++) {
+    size_t byte_pos = len - 1 - i;  // position from LSB
+    size_t li = byte_pos / 8;
+    if (li < limbs.size()) {
+      out[i] = static_cast<uint8_t>(limbs[li] >> (8 * (byte_pos % 8)));
+    }
+  }
+}
+
+int BigNum::compare(const BigNum& a, const BigNum& b) {
+  size_t an = a.limbs.size(), bn = b.limbs.size();
+  size_t n = std::max(an, bn);
+  for (size_t i = n; i > 0; i--) {
+    uint64_t al = (i - 1 < an) ? a.limbs[i - 1] : 0;
+    uint64_t bl = (i - 1 < bn) ? b.limbs[i - 1] : 0;
+    if (al < bl) return -1;
+    if (al > bl) return 1;
+  }
+  return 0;
+}
+
+BigNum BigNum::sub(const BigNum& a, const BigNum& b) {
+  // Assumes a >= b
+  BigNum r;
+  size_t n = a.limbs.size();
+  r.limbs.resize(n, 0);
+  uint64_t borrow = 0;
+  for (size_t i = 0; i < n; i++) {
+    uint64_t bl = (i < b.limbs.size()) ? b.limbs[i] : 0;
+    __uint128_t diff =
+        static_cast<__uint128_t>(a.limbs[i]) - bl - borrow;
+    r.limbs[i] = static_cast<uint64_t>(diff);
+    borrow = (diff >> 127) ? 1 : 0;  // Check if underflow (high bit set)
+  }
+  r.trim();
+  return r;
+}
+
+BigNum BigNum::mul(const BigNum& a, const BigNum& b) {
+  size_t an = a.limbs.size(), bn = b.limbs.size();
+  BigNum r;
+  r.limbs.resize(an + bn, 0);
+
+  for (size_t i = 0; i < an; i++) {
+    uint64_t carry = 0;
+    for (size_t j = 0; j < bn; j++) {
+      __uint128_t prod = static_cast<__uint128_t>(a.limbs[i]) * b.limbs[j] +
+                         r.limbs[i + j] + carry;
+      r.limbs[i + j] = static_cast<uint64_t>(prod);
+      carry = static_cast<uint64_t>(prod >> 64);
+    }
+    r.limbs[i + bn] += carry;
+  }
+
+  r.trim();
+  return r;
+}
+
+// Knuth Algorithm D: multi-precision division, returns remainder
+BigNum BigNum::mod(const BigNum& a, const BigNum& m) {
+  if (compare(a, m) < 0) return a;
+
+  size_t n = m.limbs.size();
+  size_t total = a.limbs.size();
+
+  if (n == 0 || (n == 1 && m.limbs[0] == 0)) {
+    return BigNum();  // division by zero guard
+  }
+
+  // Single-limb divisor fast path
+  if (n == 1) {
+    uint64_t d = m.limbs[0];
+    uint64_t rem = 0;
+    for (size_t i = total; i > 0; i--) {
+      __uint128_t cur = (static_cast<__uint128_t>(rem) << 64) | a.limbs[i - 1];
+      rem = static_cast<uint64_t>(cur % d);
+    }
+    BigNum r;
+    r.limbs = {rem};
+    r.trim();
+    return r;
+  }
+
+  // Normalize: shift so that the MSB of the divisor's top limb is set
+  int shift = 0;
+  uint64_t top = m.limbs[n - 1];
+  if (top != 0) {
+    shift = __builtin_clzll(top);
+  }
+
+  // Create normalized copies
+  BigNum u, v;
+  // u = a << shift, with one extra limb
+  u.limbs.resize(total + 1, 0);
+  if (shift > 0) {
+    uint64_t carry = 0;
+    for (size_t i = 0; i < total; i++) {
+      __uint128_t val = (static_cast<__uint128_t>(a.limbs[i]) << shift) | carry;
+      u.limbs[i] = static_cast<uint64_t>(val);
+      carry = static_cast<uint64_t>(val >> 64);
+    }
+    u.limbs[total] = carry;
+  } else {
+    for (size_t i = 0; i < total; i++) u.limbs[i] = a.limbs[i];
+    u.limbs[total] = 0;
+  }
+
+  v.limbs.resize(n, 0);
+  if (shift > 0) {
+    uint64_t carry = 0;
+    for (size_t i = 0; i < n; i++) {
+      __uint128_t val = (static_cast<__uint128_t>(m.limbs[i]) << shift) | carry;
+      v.limbs[i] = static_cast<uint64_t>(val);
+      carry = static_cast<uint64_t>(val >> 64);
+    }
+  } else {
+    v.limbs = m.limbs;
+  }
+
+  uint64_t vn_1 = v.limbs[n - 1];
+  uint64_t vn_2 = (n >= 2) ? v.limbs[n - 2] : 0;
+
+  // Main loop: for each quotient digit position
+  for (size_t j = total; j >= n; j--) {
+    // Estimate quotient digit
+    __uint128_t num_top =
+        (static_cast<__uint128_t>(u.limbs[j]) << 64) | u.limbs[j - 1];
+    __uint128_t qhat = num_top / vn_1;
+    __uint128_t rhat = num_top % vn_1;
+
+    // Refine estimate
+    while (qhat > 0xFFFFFFFFFFFFFFFFULL ||
+           qhat * vn_2 >
+               ((rhat << 64) | u.limbs[j - 2])) {
+      qhat--;
+      rhat += vn_1;
+      if (rhat > 0xFFFFFFFFFFFFFFFFULL) break;
+    }
+
+    // Multiply and subtract: u[j-n..j] -= qhat * v[0..n-1]
+    uint64_t carry = 0;
+    for (size_t i = 0; i < n; i++) {
+      __uint128_t prod =
+          static_cast<__uint128_t>(static_cast<uint64_t>(qhat)) * v.limbs[i] +
+          carry;
+      uint64_t prod_lo = static_cast<uint64_t>(prod);
+      carry = static_cast<uint64_t>(prod >> 64);
+      uint64_t u_val = u.limbs[j - n + i];
+      u.limbs[j - n + i] = u_val - prod_lo;
+      if (u_val < prod_lo) carry++;
+    }
+    int64_t final_diff =
+        static_cast<int64_t>(u.limbs[j]) - static_cast<int64_t>(carry);
+    u.limbs[j] = static_cast<uint64_t>(final_diff);
+
+    // If we subtracted too much, add back
+    if (final_diff < 0) {
+      uint64_t carry = 0;
+      for (size_t i = 0; i < n; i++) {
+        __uint128_t sum = static_cast<__uint128_t>(u.limbs[j - n + i]) +
+                          v.limbs[i] + carry;
+        u.limbs[j - n + i] = static_cast<uint64_t>(sum);
+        carry = static_cast<uint64_t>(sum >> 64);
+      }
+      u.limbs[j] += carry;
+    }
+  }
+
+  // Remainder is u[0..n-1] >> shift (un-normalize)
+  BigNum r;
+  r.limbs.resize(n, 0);
+  if (shift > 0) {
+    uint64_t carry = 0;
+    for (size_t i = n; i > 0; i--) {
+      __uint128_t val =
+          (static_cast<__uint128_t>(carry) << 64) | u.limbs[i - 1];
+      r.limbs[i - 1] = static_cast<uint64_t>(val >> shift);
+      carry = u.limbs[i - 1] & ((1ULL << shift) - 1);
+    }
+  } else {
+    for (size_t i = 0; i < n; i++) r.limbs[i] = u.limbs[i];
+  }
+
+  r.trim();
+  return r;
+}
+
+BigNum BigNum::modexp(const BigNum& base, uint32_t exp, const BigNum& mod_val) {
+  // Left-to-right binary square-and-multiply
+  BigNum result;
+  result.limbs = {1};
+
+  // Find highest set bit
+  if (exp == 0) {
+    return mod(result, mod_val);
+  }
+
+  int highest_bit = 31 - __builtin_clz(exp);
+
+  BigNum b = mod(base, mod_val);
+
+  for (int i = highest_bit; i >= 0; i--) {
+    result = mod(mul(result, result), mod_val);
+    if ((exp >> i) & 1) {
+      result = mod(mul(result, b), mod_val);
+    }
+  }
+
+  return result;
+}
+
+}  // namespace bignum
diff --git a/third_party/crypto/bignum.h b/third_party/crypto/bignum.h