Skip to content

Commit 158484f

Browse files
committed
Fix more corner cases in base64 decoder
1 parent fb17e34 commit 158484f

File tree

2 files changed

+41
-10
lines changed

2 files changed

+41
-10
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_base64.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b32decode_error
1111
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b32encode
1212
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b64decode
13+
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b64decode_invalid_chars
14+
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b64decode_padding_error
1315
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b64encode
1416
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b85_padding
1517
*graalpython.lib-python.3.test.test_base64.BaseXYTestCase.test_b85decode

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BinasciiModuleBuiltins.java

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,18 +171,47 @@ PBytes doConvert(VirtualFrame frame, Object buffer,
171171
@TruffleBoundary
172172
private ByteSequenceStorage b64decode(byte[] data, int dataLen) {
173173
try {
174-
// Remove superfluous padding, Java refuses it but CPython ignores it
175-
int end;
176-
for (end = dataLen - 1; end >= 0; end--) {
177-
if (data[end] != '=') {
178-
break;
174+
/*
175+
* The JDK decoder behaves differently in some corner cases. It is more restrictive
176+
* regarding superfluous padding. On the other hand, it's more permissive when it
177+
* comes to lack of padding. We compute the expected padding ourselves to cover
178+
* these two cases manually.
179+
*/
180+
// Compute the expected and real padding
181+
int base64chars = 0;
182+
int lastBase64Char = -1;
183+
int padding = 0;
184+
for (int i = 0; i < dataLen; i++) {
185+
byte c = data[i];
186+
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '+' || c == '/') {
187+
lastBase64Char = i;
188+
base64chars++;
189+
padding = 0;
190+
}
191+
if (c == '=') {
192+
padding++;
193+
}
194+
}
195+
int expectedPadding = 0;
196+
if (base64chars % 4 == 1) {
197+
throw PRaiseNode.raiseUncached(this, BinasciiError, "Invalid base64-encoded string: number of data characters (1) cannot be 1 more than a multiple of 4");
198+
} else if (base64chars % 4 == 2) {
199+
expectedPadding = 2;
200+
} else if (base64chars % 4 == 3) {
201+
expectedPadding = 1;
202+
}
203+
if (padding < expectedPadding) {
204+
throw PRaiseNode.raiseUncached(this, BinasciiError, "Incorrect padding");
205+
}
206+
// Find the end of the expected padding, if any
207+
int decodeLen = lastBase64Char + 1;
208+
int correctedPadding = 0;
209+
for (int i = decodeLen; correctedPadding < expectedPadding && i < dataLen; i++) {
210+
if (data[i] == '=') {
211+
correctedPadding++;
212+
decodeLen = i + 1;
179213
}
180214
}
181-
// The length without the padding
182-
int unpaddedLen = end + 1;
183-
// Round up to a multiple of 4 to get correct minimal padding. Clamp to dataLen in
184-
// case the data is not padded correctly
185-
int decodeLen = Math.min((unpaddedLen + 3) & ~3, dataLen);
186215
// Using MIME decoder because that one skips over anything that is not the alphabet,
187216
// just like CPython does
188217
ByteBuffer result = Base64.getMimeDecoder().decode(ByteBuffer.wrap(data, 0, decodeLen));

0 commit comments

Comments
 (0)