Skip to content

Commit 6c370dc

Browse files
committed
WIP XeCryptBnQwNeRsaPubCrypt
1 parent 32eee83 commit 6c370dc

File tree

3 files changed

+304
-93
lines changed

3 files changed

+304
-93
lines changed

src/xenia/kernel/xboxkrnl/xboxkrnl_crypt.cc

Lines changed: 30 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@
1616
#include "xenia/kernel/xboxkrnl/xboxkrnl_private.h"
1717
#include "xenia/xbox.h"
1818

19-
#ifdef XE_PLATFORM_WIN32
20-
#include "xenia/base/platform_win.h" // for bcrypt.h
21-
#endif
22-
2319
#include "third_party/crypto/TinySHA1.hpp"
20+
#include "third_party/crypto/bignum.cpp"
2421
#include "third_party/crypto/des/des.cpp"
2522
#include "third_party/crypto/des/des.h"
2623
#include "third_party/crypto/des/des3.h"
@@ -418,104 +415,44 @@ dword_result_t XeCryptBnQwNeRsaPubCrypt_entry(pointer_t<uint64_t> qw_a,
418415
pointer_t<uint64_t> qw_b,
419416
pointer_t<XECRYPT_RSA> rsa) {
420417
// 0 indicates failure (but not a BOOL return value)
421-
#ifndef XE_PLATFORM_WIN32
422-
XELOGE(
423-
"XeCryptBnQwNeRsaPubCrypt called but no implementation available for "
424-
"this platform!");
425-
assert_always();
426-
return 1;
427-
#else
428-
uint32_t modulus_size = rsa->size * 8;
429-
430-
// Convert XECRYPT blob into BCrypt format
431-
ULONG key_size = sizeof(BCRYPT_RSAKEY_BLOB) + sizeof(uint32_t) + modulus_size;
432-
auto key_buf = std::make_unique<uint8_t[]>(key_size);
433-
auto* key_header = reinterpret_cast<BCRYPT_RSAKEY_BLOB*>(key_buf.get());
434-
435-
key_header->Magic = BCRYPT_RSAPUBLIC_MAGIC;
436-
key_header->BitLength = modulus_size * 8;
437-
key_header->cbPublicExp = sizeof(uint32_t);
438-
key_header->cbModulus = modulus_size;
439-
key_header->cbPrime1 = key_header->cbPrime2 = 0;
440-
441-
// Copy in exponent/modulus, luckily these are BE inside BCrypt blob
442-
uint32_t* key_exponent = reinterpret_cast<uint32_t*>(&key_header[1]);
443-
*key_exponent = rsa->public_exponent.value;
444-
445-
// ...except modulus needs to be reversed in 64-bit chunks for BCrypt to make
446-
// use of it properly for some reason
447-
uint64_t* key_modulus = reinterpret_cast<uint64_t*>(&key_exponent[1]);
448-
uint64_t* xecrypt_modulus = reinterpret_cast<uint64_t*>(&rsa[1]);
449-
std::reverse_copy(xecrypt_modulus, xecrypt_modulus + rsa->size, key_modulus);
450-
451-
BCRYPT_ALG_HANDLE hAlgorithm = NULL;
452-
NTSTATUS status = BCryptOpenAlgorithmProvider(
453-
&hAlgorithm, BCRYPT_RSA_ALGORITHM, MS_PRIMITIVE_PROVIDER, 0);
454-
455-
if (!BCRYPT_SUCCESS(status)) {
456-
XELOGE(
457-
"XeCryptBnQwNeRsaPubCrypt: BCryptOpenAlgorithmProvider failed with "
458-
"status {:#X}!",
459-
status);
460-
return 0;
418+
uint32_t num_qwords = rsa->size;
419+
uint32_t modulus_size = num_qwords * 8;
420+
uint32_t exponent = rsa->public_exponent;
421+
422+
// Xbox stores bignums as BE uint64 limbs in LE limb order.
423+
// To get a flat big-endian byte array, reverse the qword order.
424+
// The bytes within each qword are already BE (raw byte layout preserved).
425+
auto input_be = std::vector<uint8_t>(modulus_size);
426+
auto mod_be = std::vector<uint8_t>(modulus_size);
427+
428+
const uint8_t* input_bytes = reinterpret_cast<const uint8_t*>(&qw_a[0]);
429+
const uint8_t* mod_bytes =
430+
reinterpret_cast<const uint8_t*>(&rsa[1]); // modulus follows header
431+
432+
// Reverse qword order to produce big-endian byte arrays
433+
for (uint32_t i = 0; i < num_qwords; i++) {
434+
std::memcpy(&input_be[i * 8], &input_bytes[(num_qwords - 1 - i) * 8], 8);
435+
std::memcpy(&mod_be[i * 8], &mod_bytes[(num_qwords - 1 - i) * 8], 8);
461436
}
462437

463-
BCRYPT_KEY_HANDLE hKey = NULL;
464-
status = BCryptImportKeyPair(hAlgorithm, NULL, BCRYPT_RSAPUBLIC_BLOB, &hKey,
465-
key_buf.get(), key_size, 0);
438+
auto base = bignum::BigNum::from_bytes_be(input_be.data(), modulus_size);
439+
auto modulus = bignum::BigNum::from_bytes_be(mod_be.data(), modulus_size);
466440

467-
if (!BCRYPT_SUCCESS(status)) {
468-
XELOGE(
469-
"XeCryptBnQwNeRsaPubCrypt: BCryptImportKeyPair failed with status "
470-
"{:#X}!",
471-
status);
441+
auto result = bignum::BigNum::modexp(base, exponent, modulus);
472442

473-
if (hAlgorithm) {
474-
BCryptCloseAlgorithmProvider(hAlgorithm, 0);
475-
}
443+
// Convert result back to big-endian bytes
444+
auto result_be = std::vector<uint8_t>(modulus_size);
445+
result.to_bytes_be(result_be.data(), modulus_size);
476446

477-
return 0;
447+
// Convert back to Xbox format: reverse qword order
448+
uint8_t* output_bytes = reinterpret_cast<uint8_t*>(&qw_b[0]);
449+
for (uint32_t i = 0; i < num_qwords; i++) {
450+
std::memcpy(&output_bytes[i * 8], &result_be[(num_qwords - 1 - i) * 8], 8);
478451
}
479452

480-
// Byteswap & reverse the input into output, as BCrypt wants MSB first
481-
uint64_t* output = qw_b;
482-
uint8_t* output_bytes = reinterpret_cast<uint8_t*>(output);
483-
xe::copy_and_swap<uint64_t>(output, qw_a, rsa->size);
484-
std::reverse(output_bytes, output_bytes + modulus_size);
485-
486-
// BCryptDecrypt only works with private keys, fortunately BCryptEncrypt
487-
// performs the right actions needed for us to decrypt the input
488-
ULONG result_size = 0;
489-
status =
490-
BCryptEncrypt(hKey, output_bytes, modulus_size, nullptr, nullptr, 0,
491-
output_bytes, modulus_size, &result_size, BCRYPT_PAD_NONE);
492-
493-
assert(result_size == modulus_size);
494-
495-
if (!BCRYPT_SUCCESS(status)) {
496-
XELOGE("XeCryptBnQwNeRsaPubCrypt: BCryptEncrypt failed with status {:#X}!",
497-
status);
498-
} else {
499-
// Reverse data & byteswap again so data is as game expects
500-
std::reverse(output_bytes, output_bytes + modulus_size);
501-
xe::copy_and_swap(output, output, rsa->size);
502-
}
503-
504-
if (hKey) {
505-
BCryptDestroyKey(hKey);
506-
}
507-
if (hAlgorithm) {
508-
BCryptCloseAlgorithmProvider(hAlgorithm, 0);
509-
}
510-
511-
return BCRYPT_SUCCESS(status) ? 1 : 0;
512-
#endif
453+
return 1;
513454
}
514-
#ifdef XE_PLATFORM_WIN32
515455
DECLARE_XBOXKRNL_EXPORT1(XeCryptBnQwNeRsaPubCrypt, kNone, kImplemented);
516-
#else
517-
DECLARE_XBOXKRNL_EXPORT1(XeCryptBnQwNeRsaPubCrypt, kNone, kStub);
518-
#endif
519456

520457
dword_result_t XeCryptBnDwLePkcs1Verify_entry(lpvoid_t hash, lpvoid_t sig,
521458
dword_t size) {

third_party/crypto/bignum.cpp

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
#include "third_party/crypto/bignum.h"
2+
3+
namespace bignum {
4+
5+
void BigNum::trim() {
6+
while (limbs.size() > 1 && limbs.back() == 0) {
7+
limbs.pop_back();
8+
}
9+
}
10+
11+
BigNum BigNum::from_bytes_be(const uint8_t* data, size_t len) {
12+
BigNum r;
13+
// Number of 8-byte limbs, rounding up
14+
size_t n = (len + 7) / 8;
15+
r.limbs.resize(n, 0);
16+
17+
// Read bytes big-endian into little-endian limbs
18+
for (size_t i = 0; i < len; i++) {
19+
size_t byte_pos = len - 1 - i; // position from LSB
20+
r.limbs[byte_pos / 8] |= static_cast<uint64_t>(data[i])
21+
<< (8 * (byte_pos % 8));
22+
}
23+
24+
r.trim();
25+
return r;
26+
}
27+
28+
void BigNum::to_bytes_be(uint8_t* out, size_t len) const {
29+
std::memset(out, 0, len);
30+
for (size_t i = 0; i < len; i++) {
31+
size_t byte_pos = len - 1 - i; // position from LSB
32+
size_t li = byte_pos / 8;
33+
if (li < limbs.size()) {
34+
out[i] = static_cast<uint8_t>(limbs[li] >> (8 * (byte_pos % 8)));
35+
}
36+
}
37+
}
38+
39+
int BigNum::compare(const BigNum& a, const BigNum& b) {
40+
size_t an = a.limbs.size(), bn = b.limbs.size();
41+
size_t n = std::max(an, bn);
42+
for (size_t i = n; i > 0; i--) {
43+
uint64_t al = (i - 1 < an) ? a.limbs[i - 1] : 0;
44+
uint64_t bl = (i - 1 < bn) ? b.limbs[i - 1] : 0;
45+
if (al < bl) return -1;
46+
if (al > bl) return 1;
47+
}
48+
return 0;
49+
}
50+
51+
BigNum BigNum::sub(const BigNum& a, const BigNum& b) {
52+
// Assumes a >= b
53+
BigNum r;
54+
size_t n = a.limbs.size();
55+
r.limbs.resize(n, 0);
56+
uint64_t borrow = 0;
57+
for (size_t i = 0; i < n; i++) {
58+
uint64_t bl = (i < b.limbs.size()) ? b.limbs[i] : 0;
59+
__uint128_t diff =
60+
static_cast<__uint128_t>(a.limbs[i]) - bl - borrow;
61+
r.limbs[i] = static_cast<uint64_t>(diff);
62+
borrow = (diff >> 127) ? 1 : 0; // Check if underflow (high bit set)
63+
}
64+
r.trim();
65+
return r;
66+
}
67+
68+
BigNum BigNum::mul(const BigNum& a, const BigNum& b) {
69+
size_t an = a.limbs.size(), bn = b.limbs.size();
70+
BigNum r;
71+
r.limbs.resize(an + bn, 0);
72+
73+
for (size_t i = 0; i < an; i++) {
74+
uint64_t carry = 0;
75+
for (size_t j = 0; j < bn; j++) {
76+
__uint128_t prod = static_cast<__uint128_t>(a.limbs[i]) * b.limbs[j] +
77+
r.limbs[i + j] + carry;
78+
r.limbs[i + j] = static_cast<uint64_t>(prod);
79+
carry = static_cast<uint64_t>(prod >> 64);
80+
}
81+
r.limbs[i + bn] += carry;
82+
}
83+
84+
r.trim();
85+
return r;
86+
}
87+
88+
// Knuth Algorithm D: multi-precision division, returns remainder
89+
BigNum BigNum::mod(const BigNum& a, const BigNum& m) {
90+
if (compare(a, m) < 0) return a;
91+
92+
size_t n = m.limbs.size();
93+
size_t total = a.limbs.size();
94+
95+
if (n == 0 || (n == 1 && m.limbs[0] == 0)) {
96+
return BigNum(); // division by zero guard
97+
}
98+
99+
// Single-limb divisor fast path
100+
if (n == 1) {
101+
uint64_t d = m.limbs[0];
102+
uint64_t rem = 0;
103+
for (size_t i = total; i > 0; i--) {
104+
__uint128_t cur = (static_cast<__uint128_t>(rem) << 64) | a.limbs[i - 1];
105+
rem = static_cast<uint64_t>(cur % d);
106+
}
107+
BigNum r;
108+
r.limbs = {rem};
109+
r.trim();
110+
return r;
111+
}
112+
113+
// Normalize: shift so that the MSB of the divisor's top limb is set
114+
int shift = 0;
115+
uint64_t top = m.limbs[n - 1];
116+
if (top != 0) {
117+
shift = __builtin_clzll(top);
118+
}
119+
120+
// Create normalized copies
121+
BigNum u, v;
122+
// u = a << shift, with one extra limb
123+
u.limbs.resize(total + 1, 0);
124+
if (shift > 0) {
125+
uint64_t carry = 0;
126+
for (size_t i = 0; i < total; i++) {
127+
__uint128_t val = (static_cast<__uint128_t>(a.limbs[i]) << shift) | carry;
128+
u.limbs[i] = static_cast<uint64_t>(val);
129+
carry = static_cast<uint64_t>(val >> 64);
130+
}
131+
u.limbs[total] = carry;
132+
} else {
133+
for (size_t i = 0; i < total; i++) u.limbs[i] = a.limbs[i];
134+
u.limbs[total] = 0;
135+
}
136+
137+
v.limbs.resize(n, 0);
138+
if (shift > 0) {
139+
uint64_t carry = 0;
140+
for (size_t i = 0; i < n; i++) {
141+
__uint128_t val = (static_cast<__uint128_t>(m.limbs[i]) << shift) | carry;
142+
v.limbs[i] = static_cast<uint64_t>(val);
143+
carry = static_cast<uint64_t>(val >> 64);
144+
}
145+
} else {
146+
v.limbs = m.limbs;
147+
}
148+
149+
uint64_t vn_1 = v.limbs[n - 1];
150+
uint64_t vn_2 = (n >= 2) ? v.limbs[n - 2] : 0;
151+
152+
// Main loop: for each quotient digit position
153+
for (size_t j = total; j >= n; j--) {
154+
// Estimate quotient digit
155+
__uint128_t num_top =
156+
(static_cast<__uint128_t>(u.limbs[j]) << 64) | u.limbs[j - 1];
157+
__uint128_t qhat = num_top / vn_1;
158+
__uint128_t rhat = num_top % vn_1;
159+
160+
// Refine estimate
161+
while (qhat > 0xFFFFFFFFFFFFFFFFULL ||
162+
qhat * vn_2 >
163+
((rhat << 64) | u.limbs[j - 2])) {
164+
qhat--;
165+
rhat += vn_1;
166+
if (rhat > 0xFFFFFFFFFFFFFFFFULL) break;
167+
}
168+
169+
// Multiply and subtract: u[j-n..j] -= qhat * v[0..n-1]
170+
uint64_t carry = 0;
171+
for (size_t i = 0; i < n; i++) {
172+
__uint128_t prod =
173+
static_cast<__uint128_t>(static_cast<uint64_t>(qhat)) * v.limbs[i] +
174+
carry;
175+
uint64_t prod_lo = static_cast<uint64_t>(prod);
176+
carry = static_cast<uint64_t>(prod >> 64);
177+
uint64_t u_val = u.limbs[j - n + i];
178+
u.limbs[j - n + i] = u_val - prod_lo;
179+
if (u_val < prod_lo) carry++;
180+
}
181+
int64_t final_diff =
182+
static_cast<int64_t>(u.limbs[j]) - static_cast<int64_t>(carry);
183+
u.limbs[j] = static_cast<uint64_t>(final_diff);
184+
185+
// If we subtracted too much, add back
186+
if (final_diff < 0) {
187+
uint64_t carry = 0;
188+
for (size_t i = 0; i < n; i++) {
189+
__uint128_t sum = static_cast<__uint128_t>(u.limbs[j - n + i]) +
190+
v.limbs[i] + carry;
191+
u.limbs[j - n + i] = static_cast<uint64_t>(sum);
192+
carry = static_cast<uint64_t>(sum >> 64);
193+
}
194+
u.limbs[j] += carry;
195+
}
196+
}
197+
198+
// Remainder is u[0..n-1] >> shift (un-normalize)
199+
BigNum r;
200+
r.limbs.resize(n, 0);
201+
if (shift > 0) {
202+
uint64_t carry = 0;
203+
for (size_t i = n; i > 0; i--) {
204+
__uint128_t val =
205+
(static_cast<__uint128_t>(carry) << 64) | u.limbs[i - 1];
206+
r.limbs[i - 1] = static_cast<uint64_t>(val >> shift);
207+
carry = u.limbs[i - 1] & ((1ULL << shift) - 1);
208+
}
209+
} else {
210+
for (size_t i = 0; i < n; i++) r.limbs[i] = u.limbs[i];
211+
}
212+
213+
r.trim();
214+
return r;
215+
}
216+
217+
BigNum BigNum::modexp(const BigNum& base, uint32_t exp, const BigNum& mod_val) {
218+
// Left-to-right binary square-and-multiply
219+
BigNum result;
220+
result.limbs = {1};
221+
222+
// Find highest set bit
223+
if (exp == 0) {
224+
return mod(result, mod_val);
225+
}
226+
227+
int highest_bit = 31 - __builtin_clz(exp);
228+
229+
BigNum b = mod(base, mod_val);
230+
231+
for (int i = highest_bit; i >= 0; i--) {
232+
result = mod(mul(result, result), mod_val);
233+
if ((exp >> i) & 1) {
234+
result = mod(mul(result, b), mod_val);
235+
}
236+
}
237+
238+
return result;
239+
}
240+
241+
} // namespace bignum

0 commit comments

Comments
 (0)