Skip to content

Commit c2358e9

Browse files
committed
Add separate methods for strict Standard and URL Safe Base64 decoding
1 parent fde5c6b commit c2358e9

File tree

2 files changed

+413
-7
lines changed

2 files changed

+413
-7
lines changed

src/main/java/org/apache/commons/codec/binary/Base64.java

Lines changed: 307 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,58 @@ public Builder setUrlSafe(final boolean urlSafe) {
127127
return setEncodeTable(toUrlSafeEncodeTable(urlSafe));
128128
}
129129

130+
/**
131+
* Sets the format of the decoding table.
132+
* This method allows to explicitly state whether a "standard" or "URL Safe" Base64 decoding is expected.
133+
* <p>
134+
* Note: By default, the implementation uses the MIXED approach, allowing a seamless handling of
135+
* both URL_SAFE and STANDARD base64.
136+
* </p>
137+
*
138+
* @param format table format to be used on Base64 decoding.
139+
* @return {@code this} instance.
140+
*/
141+
public Builder setDecodeTableFormat(final DecodeTableFormat format) {
142+
switch (format) {
143+
case STANDARD:
144+
return super.setDecodeTableRaw(STANDARD_DECODE_TABLE);
145+
case URL_SAFE:
146+
return super.setDecodeTableRaw(URL_SAFE_DECODE_TABLE);
147+
case MIXED:
148+
default:
149+
return super.setDecodeTableRaw(DECODE_TABLE);
150+
}
151+
}
152+
153+
}
154+
155+
/**
156+
* Defines the Base64 table format to be used on decoding
157+
* <p>
158+
* Note: By default, the MIXED approach is used, allowing a seamless handling of both URL_SAFE and STANDARD base64.
159+
* </p>
160+
*/
161+
public enum DecodeTableFormat {
162+
163+
/**
164+
* Corresponds to the "standard" Base64 coding table, as specified in Table 1 of RFC 2045.
165+
*/
166+
STANDARD,
167+
168+
/**
169+
* Corresponds to the "URL Safe" Base64 coding table, as specified in Table 2 of RFC 4648.
170+
*/
171+
URL_SAFE,
172+
173+
/**
174+
* Represents a joint approach, allowing a seamless decoding of both character sets,
175+
* corresponding to either Table 1 of RFC 2045 or Table 2 of RFC 4648.
176+
* <p>
177+
* Note: This decoding table is used by default.
178+
* </p>
179+
*/
180+
MIXED
181+
130182
}
131183

132184
/**
@@ -170,7 +222,7 @@ public Builder setUrlSafe(final boolean urlSafe) {
170222
/**
171223
* This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
172224
* in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
173-
* alphabet but fall within the bounds of the array are translated to -1.
225+
* or Base64 URL Safe alphabets but fall within the bounds of the array are translated to -1.
174226
* <p>
175227
* Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
176228
* URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
@@ -181,7 +233,7 @@ public Builder setUrlSafe(final boolean urlSafe) {
181233
* </p>
182234
*/
183235
private static final byte[] DECODE_TABLE = {
184-
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
236+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
185237
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
186238
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
187239
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
@@ -192,6 +244,49 @@ public Builder setUrlSafe(final boolean urlSafe) {
192244
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
193245
};
194246

247+
/**
248+
* This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
249+
* in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
250+
* alphabet but fall within the bounds of the array are translated to -1.
251+
* <p>
252+
* Note: This decoding table handles only the "standard" base64 characters, such as '+' and '/'.
253+
* The "url-safe" characters such as '-' and '_' are not supported by the table.
254+
* </p>
255+
*/
256+
private static final byte[] STANDARD_DECODE_TABLE = {
257+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
258+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
259+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
260+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // 20-2f + /
261+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
262+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
263+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50-5f P-Z
264+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
265+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
266+
};
267+
268+
/**
269+
* This array is a lookup table that translates Unicode characters drawn from the "Base64 URL Safe Alphabet"
270+
* (as specified in Table 2 of RFC 4648) into their 6-bit positive integer equivalents.
271+
* Characters that are not in the Base64 URL Safe alphabet but fall within the bounds of the array
272+
* are translated to -1.
273+
* <p>
274+
* Note: This decoding table handles only the "URL Safe" base64 characters, such as '-' and '_'.
275+
* The "standard" characters such as '+' and '/' are not supported by the table.
276+
* </p>
277+
*/
278+
private static final byte[] URL_SAFE_DECODE_TABLE = {
279+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
280+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
281+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
282+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, // 20-2f -
283+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
284+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
285+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
286+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
287+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
288+
};
289+
195290
/**
196291
* Base64 uses 6-bit fields.
197292
*/
@@ -251,6 +346,11 @@ private static byte[] calculateDecodeTable(final byte[] encodeTable) {
251346
* Decodes Base64 data into octets.
252347
* <p>
253348
* <strong>Note:</strong> this method seamlessly handles data encoded in URL-safe or normal mode.
349+
* For enforcing verification against strict standard Base64 or Base64 URL Safe tables,
350+
* please use {@code #decodeBase64Standard} or {@code decodeBase64Url} methods respectively.
351+
* </p>
352+
* <p>
353+
* <strong>Note 2:</strong> this method skips any unknown or not supported bytes.
254354
* </p>
255355
*
256356
* @param base64Data
@@ -265,6 +365,11 @@ public static byte[] decodeBase64(final byte[] base64Data) {
265365
* Decodes a Base64 String into octets.
266366
* <p>
267367
* <strong>Note:</strong> this method seamlessly handles data encoded in URL-safe or normal mode.
368+
* For enforcing verification against strict standard Base64 or Base64 URL Safe tables,
369+
* please use {@code #decodeBase64Standard} or {@code decodeBase64Url} methods respectively.
370+
* </p>
371+
* <p>
372+
* <strong>Note 2:</strong> this method skips any unknown or not supported characters.
268373
* </p>
269374
*
270375
* @param base64String
@@ -276,6 +381,78 @@ public static byte[] decodeBase64(final String base64String) {
276381
return new Base64().decode(base64String);
277382
}
278383

384+
/**
385+
* Decodes standard Base64 data into octets.
386+
* <p>
387+
* Note: implementation of this method is aligned with the Table 1 of RFC 2045.
388+
* </p>
389+
* <p>
390+
* Note 2 this method skips any unknown or not supported bytes.
391+
* </p>
392+
*
393+
* @param base64Data
394+
* Byte array containing Base64 data
395+
* @return Array containing decoded data.
396+
* @since 1.21
397+
*/
398+
public static byte[] decodeBase64Standard(final byte[] base64Data) {
399+
return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64Data);
400+
}
401+
402+
/**
403+
* Decodes a standard Base64 String into octets.
404+
* <p>
405+
* Note: implementation of this method is aligned with the Table 1 of RFC 2045.
406+
* </p>
407+
* <p>
408+
* Note 2: this method skips any unknown or not supported characters.
409+
* </p>
410+
*
411+
* @param base64String
412+
* String containing Base64 data
413+
* @return Array containing decoded data.
414+
* @since 1.21
415+
*/
416+
public static byte[] decodeBase64Standard(final String base64String) {
417+
return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64String);
418+
}
419+
420+
/**
421+
* Decodes URL Safe Base64 data into octets.
422+
* <p>
423+
* Note: implementation of this method is aligned with the Table 2 of RFC 4648.
424+
* </p>
425+
* <p>
426+
* Note 2 this method skips any unknown or not supported bytes.
427+
* </p>
428+
*
429+
* @param base64Data
430+
* Byte array containing Base64 data
431+
* @return Array containing decoded data.
432+
* @since 1.21
433+
*/
434+
public static byte[] decodeBase64Url(final byte[] base64Data) {
435+
return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64Data);
436+
}
437+
438+
/**
439+
* Decodes a URL Safe Base64 String into octets.
440+
* <p>
441+
* Note: implementation of this method is aligned with the Table 2 of RFC 4648.
442+
* </p>
443+
* <p>
444+
* Note 2 this method skips any unknown or not supported characters.
445+
* </p>
446+
*
447+
* @param base64String
448+
* String containing Base64 data
449+
* @return Array containing decoded data.
450+
* @since 1.21
451+
*/
452+
public static byte[] decodeBase64Url(final String base64String) {
453+
return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64String);
454+
}
455+
279456
/**
280457
* Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
281458
*
@@ -452,6 +629,11 @@ public static boolean isArrayByteBase64(final byte[] arrayOctet) {
452629

453630
/**
454631
* Returns whether or not the {@code octet} is in the base 64 alphabet.
632+
* <p>
633+
* Note: this method threats both characters '+' and '/' and '-' and '_' as valid base64 characters.
634+
* For enforcing verification against strict standard Base64 or Base64 URL Safe tables,
635+
* please use {@code #isBase64Standard} or {@code isBase64Url} methods respectively.
636+
* </p>
455637
*
456638
* @param octet
457639
* The value to test
@@ -465,6 +647,11 @@ public static boolean isBase64(final byte octet) {
465647
/**
466648
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
467649
* method treats whitespace as valid.
650+
* <p>
651+
* Note: this method threats both characters '+' and '/' and '-' and '_' as valid base64 characters.
652+
* For enforcing verification against strict standard Base64 or Base64 URL Safe tables,
653+
* please use {@code #isBase64Standard} or {@code isBase64Url} methods respectively.
654+
* </p>
468655
*
469656
* @param arrayOctet
470657
* byte array to test
@@ -484,17 +671,134 @@ public static boolean isBase64(final byte[] arrayOctet) {
484671
/**
485672
* Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
486673
* method treats whitespace as valid.
674+
* <p>
675+
* Note: this method threats both characters '+' and '/' and '-' and '_' as valid base64 characters.
676+
* For enforcing verification against strict standard Base64 or Base64 URL Safe tables,
677+
* please use {@code #isBase64Standard} or {@code isBase64Url} methods respectively.
678+
* </p>
487679
*
488680
* @param base64
489681
* String to test
490682
* @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
491683
* the String is empty; {@code false}, otherwise
492-
* @since 1.5
684+
* @since 1.5
493685
*/
494686
public static boolean isBase64(final String base64) {
495687
return isBase64(StringUtils.getBytesUtf8(base64));
496688
}
497689

690+
/**
691+
* Returns whether or not the {@code octet} is in the standard base 64 alphabet.
692+
* <p>
693+
* Note: implementation of this method is aligned with the Table 1 of RFC 2045.
694+
* </p>
695+
*
696+
* @param octet
697+
* The value to test
698+
* @return {@code true} if the value is defined in the standard base 64 alphabet,
699+
* {@code false} otherwise.
700+
* @since 1.21
701+
*/
702+
public static boolean isBase64Standard(final byte octet) {
703+
return octet == PAD_DEFAULT || octet >= 0 && octet < STANDARD_DECODE_TABLE.length && STANDARD_DECODE_TABLE[octet] != -1;
704+
}
705+
706+
/**
707+
* Tests a given byte array to see if it contains only valid characters within the standard Base64 alphabet.
708+
* The method treats whitespace as valid.
709+
* <p>
710+
* Note: implementation of this method is aligned with the Table 1 of RFC 2045.
711+
* </p>
712+
*
713+
* @param arrayOctet
714+
* byte array to test
715+
* @return {@code true} if all bytes are valid characters in the standard Base64 alphabet.
716+
* {@code false}, otherwise
717+
* @since 1.21
718+
*/
719+
public static boolean isBase64Standard(final byte[] arrayOctet) {
720+
for (final byte element : arrayOctet) {
721+
if (!isBase64Standard(element) && !Character.isWhitespace(element)) {
722+
return false;
723+
}
724+
}
725+
return true;
726+
}
727+
728+
/**
729+
* Tests a given String to see if it contains only valid characters within the standard Base64 alphabet.
730+
* The method treats whitespace as valid.
731+
* <p>
732+
* Note: implementation of this method is aligned with the Table 1 of RFC 2045.
733+
* </p>
734+
*
735+
* @param base64
736+
* String to test
737+
* @return {@code true} if all characters in the String are valid characters in the standard Base64 alphabet or
738+
* if the String is empty;
739+
* {@code false}, otherwise
740+
* @since 1.21
741+
*/
742+
public static boolean isBase64Standard(final String base64) {
743+
return isBase64Standard(StringUtils.getBytesUtf8(base64));
744+
}
745+
746+
/**
747+
* Returns whether or not the {@code octet} is in the url safe base 64 alphabet.
748+
* <p>
749+
* Note: implementation of this method is aligned with the Table 2 of RFC 4648.
750+
* </p>
751+
*
752+
* @param octet
753+
* The value to test
754+
* @return {@code true} if the value is defined in the url safe base 64 alphabet,
755+
* {@code false} otherwise.
756+
* @since 1.21
757+
*/
758+
public static boolean isBase64Url(final byte octet) {
759+
return octet == PAD_DEFAULT || octet >= 0 && octet < URL_SAFE_DECODE_TABLE.length && URL_SAFE_DECODE_TABLE[octet] != -1;
760+
}
761+
762+
/**
763+
* Tests a given byte array to see if it contains only valid characters within the URL Safe Base64 alphabet.
764+
* The method treats whitespace as valid.
765+
* <p>
766+
* Note: implementation of this method is aligned with the Table 2 of RFC 4648.
767+
* </p>
768+
*
769+
* @param arrayOctet
770+
* byte array to test
771+
* @return {@code true} if all bytes are valid characters in the URL Safe Base64 alphabet.
772+
* {@code false}, otherwise
773+
* @since 1.21
774+
*/
775+
public static boolean isBase64Url(final byte[] arrayOctet) {
776+
for (final byte element : arrayOctet) {
777+
if (!isBase64Url(element) && !Character.isWhitespace(element)) {
778+
return false;
779+
}
780+
}
781+
return true;
782+
}
783+
784+
/**
785+
* Tests a given String to see if it contains only valid characters within the URL Safe Base64 alphabet.
786+
* The method treats whitespace as valid.
787+
* <p>
788+
* Note: implementation of this method is aligned with the Table 2 of RFC 4648.
789+
* </p>
790+
*
791+
* @param base64
792+
* String to test
793+
* @return {@code true} if all characters in the String are valid characters in the URL Safe Base64 alphabet or
794+
* if the String is empty;
795+
* {@code false}, otherwise
796+
* @since 1.21
797+
*/
798+
public static boolean isBase64Url(final String base64) {
799+
return isBase64Url(StringUtils.getBytesUtf8(base64));
800+
}
801+
498802
/**
499803
* Returns a byte-array representation of a {@code BigInteger} without sign bit.
500804
*

0 commit comments

Comments
 (0)