From 3d575e2ae5f6a34ddff6ee633aef9e904251e6ad Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Wed, 1 Apr 2026 00:53:33 +0530 Subject: [PATCH 1/7] feat: add base64 support Signed-off-by: RatanKokal --- src/strings/CMakeLists.txt | 1 + src/strings/stdlib_base64.fypp | 388 ++++++++++++++++++++++++++++++++ src/strings/stdlib_strings.fypp | 2 + test/string/CMakeLists.txt | 1 + test/string/test_base64.f90 | 155 +++++++++++++ 5 files changed, 547 insertions(+) create mode 100644 src/strings/stdlib_base64.fypp create mode 100644 test/string/test_base64.f90 diff --git a/src/strings/CMakeLists.txt b/src/strings/CMakeLists.txt index ad654dc9f..bc821866b 100644 --- a/src/strings/CMakeLists.txt +++ b/src/strings/CMakeLists.txt @@ -1,4 +1,5 @@ set(strings_fppFiles + stdlib_base64.fypp stdlib_string_type.fypp stdlib_string_type_constructor.fypp stdlib_str2num.fypp diff --git a/src/strings/stdlib_base64.fypp b/src/strings/stdlib_base64.fypp new file mode 100644 index 000000000..12750b108 --- /dev/null +++ b/src/strings/stdlib_base64.fypp @@ -0,0 +1,388 @@ +! SPDX-Identifier: MIT +#:include "common.fypp" + +module stdlib_base64 + use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk, c_bool + implicit none + private + + public :: base64_encode, base64_decode + + character(len=*), parameter :: B64_ALPHABET = & + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + + interface base64_encode +#:for k1, t1, _, _ in REAL_KINDS_TYPES + module procedure :: base64_encode_real_${k1}$ +#:endfor +#:for k1, t1, _, _ in INT_KINDS_TYPES + module procedure :: base64_encode_int_${k1}$ +#:endfor +#:for k1, t1, _ in CMPLX_KINDS_TYPES + module procedure :: base64_encode_cmplx_${k1}$ +#:endfor +#:for k1, t1 in LOG_KINDS_TYPES + module procedure :: base64_encode_logical_${k1}$ +#:endfor + end interface base64_encode + +contains + +#:for k1, t1, _, _ in REAL_KINDS_TYPES + module function base64_encode_real_${k1}$(data) result(str) + ${t1}$, intent(in) :: data(..) + character(len=:), allocatable :: str + + ${t1}$, allocatable :: flat(:) + integer(int8), allocatable :: bytes(:) + integer :: nbytes + + select rank(d => data) + rank(0) + allocate(flat(1)) + flat(1) = d + rank(1) + flat = d +#:for r in range(2, MAXRANK + 1) + rank(${r}$) + flat = pack(d, .true.) +#:endfor + rank default + str = "" + return + end select + + if (size(flat) == 0) then + str = "" + return + end if + + nbytes = size(flat) * storage_size(flat(1)) / 8 + bytes = transfer(flat, [0_int8], nbytes) + str = base64_encode_bytes(bytes) + end function base64_encode_real_${k1}$ + +#:endfor +#:for k1, t1, _, _ in INT_KINDS_TYPES + module function base64_encode_int_${k1}$(data) result(str) + ${t1}$, intent(in) :: data(..) + character(len=:), allocatable :: str + + ${t1}$, allocatable :: flat(:) + integer(int8), allocatable :: bytes(:) + integer :: nbytes + + select rank(d => data) + rank(0) + allocate(flat(1)) + flat(1) = d + rank(1) + flat = d +#:for r in range(2, MAXRANK + 1) + rank(${r}$) + flat = pack(d, .true.) +#:endfor + rank default + str = "" + return + end select + + if (size(flat) == 0) then + str = "" + return + end if + + nbytes = size(flat) * storage_size(flat(1)) / 8 + bytes = transfer(flat, [0_int8], nbytes) + str = base64_encode_bytes(bytes) + end function base64_encode_int_${k1}$ + +#:endfor +#:for k1, t1, _ in CMPLX_KINDS_TYPES + module function base64_encode_cmplx_${k1}$(data) result(str) + ${t1}$, intent(in) :: data(..) + character(len=:), allocatable :: str + + ${t1}$, allocatable :: flat(:) + integer(int8), allocatable :: bytes(:) + integer :: nbytes + + select rank(d => data) + rank(0) + allocate(flat(1)) + flat(1) = d + rank(1) + flat = d +#:for r in range(2, MAXRANK + 1) + rank(${r}$) + flat = pack(d, .true.) +#:endfor + rank default + str = "" + return + end select + + if (size(flat) == 0) then + str = "" + return + end if + + nbytes = size(flat) * storage_size(flat(1)) / 8 + bytes = transfer(flat, [0_int8], nbytes) + str = base64_encode_bytes(bytes) + end function base64_encode_cmplx_${k1}$ + +#:endfor +#:for k1, t1 in LOG_KINDS_TYPES + module function base64_encode_logical_${k1}$(data) result(str) + ${t1}$, intent(in) :: data(..) + character(len=:), allocatable :: str + + ${t1}$, allocatable :: flat(:) + integer(int8), allocatable :: bytes(:) + integer :: nbytes + + select rank(d => data) + rank(0) + allocate(flat(1)) + flat(1) = d + rank(1) + flat = d +#:for r in range(2, MAXRANK + 1) + rank(${r}$) + flat = pack(d, .true.) +#:endfor + rank default + str = "" + return + end select + + if (size(flat) == 0) then + str = "" + return + end if + + nbytes = size(flat) * storage_size(flat(1)) / 8 + bytes = transfer(flat, [0_int8], nbytes) + str = base64_encode_bytes(bytes) + end function base64_encode_logical_${k1}$ + +#:endfor + function base64_encode_bytes(bytes) result(str) + integer(int8), intent(in) :: bytes(:) + character(len=:), allocatable :: str + + integer :: nbytes, out_len, i, o, rem + integer(int32) :: b0, b1, b2, s0, s1, s2, s3 + + nbytes = size(bytes) + if (nbytes == 0) then + str = "" + return + end if + + out_len = 4 * ((nbytes + 2) / 3) + allocate(character(len=out_len) :: str) + + i = 1 + o = 1 + do while (i <= nbytes) + rem = nbytes - i + 1 + + b0 = iand(int(bytes(i), int32), int(z'FF', int32)) + if (rem >= 2) then + b1 = iand(int(bytes(i + 1), int32), int(z'FF', int32)) + else + b1 = 0_int32 + end if + if (rem >= 3) then + b2 = iand(int(bytes(i + 2), int32), int(z'FF', int32)) + else + b2 = 0_int32 + end if + + s0 = ishft(b0, -2) + s1 = ior(ishft(iand(b0, 3_int32), 4), ishft(b1, -4)) + s2 = ior(ishft(iand(b1, 15_int32), 2), ishft(b2, -6)) + s3 = iand(b2, 63_int32) + + str(o:o) = B64_ALPHABET(s0 + 1:s0 + 1) + str(o + 1:o + 1) = B64_ALPHABET(s1 + 1:s1 + 1) + + if (rem == 1) then + str(o + 2:o + 2) = "=" + str(o + 3:o + 3) = "=" + else if (rem == 2) then + str(o + 2:o + 2) = B64_ALPHABET(s2 + 1:s2 + 1) + str(o + 3:o + 3) = "=" + else + str(o + 2:o + 2) = B64_ALPHABET(s2 + 1:s2 + 1) + str(o + 3:o + 3) = B64_ALPHABET(s3 + 1:s3 + 1) + end if + + i = i + 3 + o = o + 4 + end do + end function base64_encode_bytes + + function base64_decode(str) result(res) + character(len=*), intent(in) :: str + character(len=:), allocatable :: res + + character(len=:), allocatable :: packed + integer(int8), allocatable :: bytes(:) + integer :: n, i, p, pad, out_len + integer(int32) :: s0, s1, s2, s3, b0, b1, b2 + character :: c2, c3, c4 + + n = 0 + do i = 1, len(str) + if (.not. is_b64_whitespace(str(i:i))) n = n + 1 + end do + + if (n == 0) then + res = "" + return + end if + + allocate(character(len=n) :: packed) + p = 0 + do i = 1, len(str) + if (.not. is_b64_whitespace(str(i:i))) then + p = p + 1 + packed(p:p) = str(i:i) + end if + end do + + if (mod(n, 4) /= 0) then + res = "" + return + end if + + pad = 0 + if (packed(n:n) == "=") pad = pad + 1 + if (packed(n - 1:n - 1) == "=") pad = pad + 1 + + if (n - pad > 0) then + if (index(packed(:n - pad), "=") /= 0) then + res = "" + return + end if + end if + + out_len = 3 * (n / 4) - pad + if (out_len == 0) then + res = "" + return + end if + + allocate(bytes(out_len)) + p = 1 + + do i = 1, n, 4 + s0 = b64_value(packed(i:i)) + s1 = b64_value(packed(i + 1:i + 1)) + c3 = packed(i + 2:i + 2) + c4 = packed(i + 3:i + 3) + + if (s0 < 0 .or. s1 < 0) then + res = "" + return + end if + + if (c3 == "=") then + if (c4 /= "=") then + res = "" + return + end if + if (i + 3 /= n) then + res = "" + return + end if + + b0 = ior(ishft(s0, 2), ishft(s1, -4)) + if (b0 > 127_int32) then + bytes(p) = int(b0 - 256_int32, int8) + else + bytes(p) = int(b0, int8) + end if + p = p + 1 + cycle + end if + + s2 = b64_value(c3) + if (s2 < 0) then + res = "" + return + end if + + b0 = ior(ishft(s0, 2), ishft(s1, -4)) + b1 = ior(ishft(iand(s1, 15_int32), 4), ishft(s2, -2)) + + if (b0 > 127_int32) then + bytes(p) = int(b0 - 256_int32, int8) + else + bytes(p) = int(b0, int8) + end if + p = p + 1 + if (p <= out_len) then + if (b1 > 127_int32) then + bytes(p) = int(b1 - 256_int32, int8) + else + bytes(p) = int(b1, int8) + end if + p = p + 1 + end if + + if (c4 == "=") then + if (i + 3 /= n) then + res = "" + return + end if + cycle + end if + + s3 = b64_value(c4) + if (s3 < 0) then + res = "" + return + end if + + b2 = ior(ishft(iand(s2, 3_int32), 6), s3) + if (p <= out_len) then + if (b2 > 127_int32) then + bytes(p) = int(b2 - 256_int32, int8) + else + bytes(p) = int(b2, int8) + end if + p = p + 1 + end if + end do + + allocate(character(len=out_len) :: res) + do i = 1, out_len + ! Masking ensures we treat the byte as unsigned 0-255 + res(i:i) = achar(iand(int(bytes(i), int32), 255_int32)) + end do + end function base64_decode + + elemental integer function b64_value(ch) result(val) + character, intent(in) :: ch + integer :: pos + + pos = index(B64_ALPHABET, ch) + if (pos == 0) then + val = -1 + else + val = pos - 1 + end if + end function b64_value + + elemental logical function is_b64_whitespace(ch) result(tf) + character, intent(in) :: ch + + tf = ch == " " .or. ch == achar(9) .or. ch == achar(10) .or. & + ch == achar(13) .or. ch == achar(11) .or. ch == achar(12) + end function is_b64_whitespace + +end module stdlib_base64 diff --git a/src/strings/stdlib_strings.fypp b/src/strings/stdlib_strings.fypp index 9cacb1f72..1f449e235 100644 --- a/src/strings/stdlib_strings.fypp +++ b/src/strings/stdlib_strings.fypp @@ -5,6 +5,7 @@ !> The specification of this module is available [here](../page/specs/stdlib_strings.html). module stdlib_strings use stdlib_ascii, only: whitespace + use stdlib_base64, only: base64_encode, base64_decode use stdlib_string_type, only: string_type, char, verify, repeat, len, len_trim, move use stdlib_optval, only: optval use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk, c_bool, c_char @@ -13,6 +14,7 @@ module stdlib_strings private public :: to_string + public :: base64_encode, base64_decode public :: to_c_char public :: strip, chomp public :: starts_with, ends_with diff --git a/test/string/CMakeLists.txt b/test/string/CMakeLists.txt index 833471c9e..a22145a2d 100644 --- a/test/string/CMakeLists.txt +++ b/test/string/CMakeLists.txt @@ -14,6 +14,7 @@ ADDTEST(string_intrinsic) ADDTEST(string_match) ADDTEST(string_derivedtype_io) ADDTEST(string_functions) +ADDTEST(base64) ADDTEST(string_strip_chomp) ADDTEST(string_to_number) ADDTEST(string_to_string) diff --git a/test/string/test_base64.f90 b/test/string/test_base64.f90 new file mode 100644 index 000000000..115a1bef1 --- /dev/null +++ b/test/string/test_base64.f90 @@ -0,0 +1,155 @@ +! SPDX-Identifier: MIT +module test_base64 + use testdrive, only : new_unittest, unittest_type, error_type, check + use stdlib_base64, only : base64_encode, base64_decode + use stdlib_kinds, only : int8, int32, dp, lk + implicit none + +contains + + subroutine collect_base64(testsuite) + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + new_unittest("base64-known-vectors", test_known_vectors), & + new_unittest("base64-decode-whitespace", test_decode_whitespace), & + new_unittest("base64-decode-invalid", test_decode_invalid), & + new_unittest("base64-roundtrip-int32", test_roundtrip_int32), & + new_unittest("base64-roundtrip-real", test_roundtrip_real), & + new_unittest("base64-roundtrip-complex", test_roundtrip_complex), & + new_unittest("base64-roundtrip-logical", test_roundtrip_logical), & + new_unittest("base64-rank0", test_rank0_encode) & + ] + end subroutine collect_base64 + + subroutine test_known_vectors(error) + type(error_type), allocatable, intent(out) :: error + + call check(error, base64_encode([int(77, int8), int(97, int8), int(110, int8)]) == "TWFu") + if (allocated(error)) return + call check(error, base64_encode([int(77, int8), int(97, int8)]) == "TWE=") + if (allocated(error)) return + call check(error, base64_encode([int(77, int8)]) == "TQ==") + if (allocated(error)) return + + call check(error, base64_decode("TWFu") == "Man") + if (allocated(error)) return + call check(error, base64_decode("TWE=") == "Ma") + if (allocated(error)) return + call check(error, base64_decode("TQ==") == "M") + end subroutine test_known_vectors + + subroutine test_decode_whitespace(error) + type(error_type), allocatable, intent(out) :: error + + call check(error, base64_decode("T W" // new_line("a") // "Fu") == "Man") + end subroutine test_decode_whitespace + + subroutine test_decode_invalid(error) + type(error_type), allocatable, intent(out) :: error + + call check(error, base64_decode("abc") == "") + if (allocated(error)) return + call check(error, base64_decode("A===") == "") + if (allocated(error)) return + call check(error, base64_decode("AA=A") == "") + end subroutine test_decode_invalid + + subroutine test_roundtrip_int32(error) + type(error_type), allocatable, intent(out) :: error + + integer(int32) :: vals(4), got(4) + character(len=:), allocatable :: enc, dec + + vals = [1_int32, -2_int32, 1024_int32, -4096_int32] + + enc = base64_encode(vals) + dec = base64_decode(enc) + got = transfer(dec, got) + + call check(error, all(got == vals)) + end subroutine test_roundtrip_int32 + + subroutine test_roundtrip_real(error) + type(error_type), allocatable, intent(out) :: error + + real(dp) :: vals(4), got(4) + character(len=:), allocatable :: enc, dec + + vals = [1.5_dp, -2.25_dp, 0.125_dp, 9.0_dp] + + enc = base64_encode(vals) + dec = base64_decode(enc) + got = transfer(dec, got) + + call check(error, all(got == vals)) + end subroutine test_roundtrip_real + + subroutine test_roundtrip_complex(error) + type(error_type), allocatable, intent(out) :: error + + complex(dp) :: vals(3), got(3) + character(len=:), allocatable :: enc, dec + + vals = [cmplx(1.0_dp, 2.0_dp, dp), cmplx(-3.0_dp, 0.5_dp, dp), cmplx(0.0_dp, -4.0_dp, dp)] + + enc = base64_encode(vals) + dec = base64_decode(enc) + got = transfer(dec, got) + + call check(error, all(got == vals)) + end subroutine test_roundtrip_complex + + subroutine test_roundtrip_logical(error) + type(error_type), allocatable, intent(out) :: error + logical(lk) :: vals(5), got(5) + integer(int8) :: temp_bytes(5 * storage_size(.true._lk)/8) ! Buffer + character(len=:), allocatable :: enc, dec + + vals = [.true._lk, .false._lk, .true._lk, .true._lk, .false._lk] + + enc = base64_encode(vals) + dec = base64_decode(enc) + + got = transfer(dec, got) + + call check(error, all((vals .neqv. .false._lk) .eqv. (got .neqv. .false._lk))) + end subroutine test_roundtrip_logical + + subroutine test_rank0_encode(error) + type(error_type), allocatable, intent(out) :: error + + integer(int32) :: v + + v = 42_int32 + call check(error, len(base64_encode(v)) > 0) + end subroutine test_rank0_encode + +end module test_base64 + + +program tester + use, intrinsic :: iso_fortran_env, only : error_unit + use testdrive, only : run_testsuite, new_testsuite, testsuite_type + use test_base64, only : collect_base64 + implicit none + integer :: stat, is + type(testsuite_type), allocatable :: testsuites(:) + character(len=*), parameter :: fmt = '("#", *(1x, a))' + + stat = 0 + + testsuites = [ & + new_testsuite("base64", collect_base64) & + ] + + do is = 1, size(testsuites) + write(error_unit, fmt) "Testing:", testsuites(is)%name + call run_testsuite(testsuites(is)%collect, error_unit, stat) + end do + + if (stat > 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if +end program tester From 3cb5be3845e7178b9f86f61246feb7eb682ba88c Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Wed, 1 Apr 2026 01:31:26 +0530 Subject: [PATCH 2/7] perf: optimize base64 decoder with O(1) LUT and single-pass bit accumulator Replaced O(N) string searches with an instant Look-Up Table and implemented a 32-bit accumulator to process data in a single pass. Eliminated multi-pass string packing, removed helper function overhead (dead code elimination), and pre-allocated memory to prevent expensive reallocations. Signed-off-by: RatanKokal --- src/strings/stdlib_base64.fypp | 228 ++++++++++++--------------------- test/string/test_base64.f90 | 1 - 2 files changed, 82 insertions(+), 147 deletions(-) diff --git a/src/strings/stdlib_base64.fypp b/src/strings/stdlib_base64.fypp index 12750b108..6cb89e61f 100644 --- a/src/strings/stdlib_base64.fypp +++ b/src/strings/stdlib_base64.fypp @@ -63,6 +63,7 @@ contains end function base64_encode_real_${k1}$ #:endfor + #:for k1, t1, _, _ in INT_KINDS_TYPES module function base64_encode_int_${k1}$(data) result(str) ${t1}$, intent(in) :: data(..) @@ -98,6 +99,7 @@ contains end function base64_encode_int_${k1}$ #:endfor + #:for k1, t1, _ in CMPLX_KINDS_TYPES module function base64_encode_cmplx_${k1}$(data) result(str) ${t1}$, intent(in) :: data(..) @@ -133,6 +135,7 @@ contains end function base64_encode_cmplx_${k1}$ #:endfor + #:for k1, t1 in LOG_KINDS_TYPES module function base64_encode_logical_${k1}$(data) result(str) ${t1}$, intent(in) :: data(..) @@ -168,6 +171,7 @@ contains end function base64_encode_logical_${k1}$ #:endfor + function base64_encode_bytes(bytes) result(str) integer(int8), intent(in) :: bytes(:) character(len=:), allocatable :: str @@ -189,14 +193,14 @@ contains do while (i <= nbytes) rem = nbytes - i + 1 - b0 = iand(int(bytes(i), int32), int(z'FF', int32)) + b0 = iand(int(bytes(i), int32), 255_int32) if (rem >= 2) then - b1 = iand(int(bytes(i + 1), int32), int(z'FF', int32)) + b1 = iand(int(bytes(i + 1), int32), 255_int32) else b1 = 0_int32 end if if (rem >= 3) then - b2 = iand(int(bytes(i + 2), int32), int(z'FF', int32)) + b2 = iand(int(bytes(i + 2), int32), 255_int32) else b2 = 0_int32 end if @@ -206,18 +210,17 @@ contains s2 = ior(ishft(iand(b1, 15_int32), 2), ishft(b2, -6)) s3 = iand(b2, 63_int32) - str(o:o) = B64_ALPHABET(s0 + 1:s0 + 1) - str(o + 1:o + 1) = B64_ALPHABET(s1 + 1:s1 + 1) + str(o:o) = B64_ALPHABET(s0 + 1:s0 + 1) + str(o+1:o+1) = B64_ALPHABET(s1 + 1:s1 + 1) if (rem == 1) then - str(o + 2:o + 2) = "=" - str(o + 3:o + 3) = "=" + str(o+2:o+3) = "==" else if (rem == 2) then - str(o + 2:o + 2) = B64_ALPHABET(s2 + 1:s2 + 1) - str(o + 3:o + 3) = "=" + str(o+2:o+2) = B64_ALPHABET(s2 + 1:s2 + 1) + str(o+3:o+3) = "=" else - str(o + 2:o + 2) = B64_ALPHABET(s2 + 1:s2 + 1) - str(o + 3:o + 3) = B64_ALPHABET(s3 + 1:s3 + 1) + str(o+2:o+2) = B64_ALPHABET(s2 + 1:s2 + 1) + str(o+3:o+3) = B64_ALPHABET(s3 + 1:s3 + 1) end if i = i + 3 @@ -228,161 +231,94 @@ contains function base64_decode(str) result(res) character(len=*), intent(in) :: str character(len=:), allocatable :: res - - character(len=:), allocatable :: packed + integer(int8), allocatable :: bytes(:) - integer :: n, i, p, pad, out_len - integer(int32) :: s0, s1, s2, s3, b0, b1, b2 - character :: c2, c3, c4 - - n = 0 - do i = 1, len(str) - if (.not. is_b64_whitespace(str(i:i))) n = n + 1 - end do - - if (n == 0) then - res = "" - return - end if - - allocate(character(len=n) :: packed) - p = 0 - do i = 1, len(str) - if (.not. is_b64_whitespace(str(i:i))) then - p = p + 1 - packed(p:p) = str(i:i) - end if - end do - - if (mod(n, 4) /= 0) then - res = "" - return - end if - - pad = 0 - if (packed(n:n) == "=") pad = pad + 1 - if (packed(n - 1:n - 1) == "=") pad = pad + 1 - - if (n - pad > 0) then - if (index(packed(:n - pad), "=") /= 0) then - res = "" - return - end if - end if - - out_len = 3 * (n / 4) - pad - if (out_len == 0) then + integer :: i, p, val, bits + integer :: n_chars, pad_chars ! Added counters for strict validation + integer(int32) :: buffer + + ! Look-Up Table: 0-63=Values, -1=Invalid, -2=Whitespace, -3=Padding + integer(int32), parameter :: DEC_LUT(0:255) = [ & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -1, -1, & ! 0-15 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & ! 16-31 + -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, & ! 32-47 (+, /) + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -3, -1, -1, & ! 48-63 (0-9, =) + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, & ! 64-79 (A-O) + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, & ! 80-95 (P-Z) + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, & ! 96-111 (a-o) + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, & ! 112-127 (p-z) + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & ! 128+ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ] + + if (len(str) == 0) then res = "" return end if - allocate(bytes(out_len)) + allocate(bytes((len(str) * 3) / 4 + 3)) + + buffer = 0_int32 + bits = 0 p = 1 + n_chars = 0 + pad_chars = 0 - do i = 1, n, 4 - s0 = b64_value(packed(i:i)) - s1 = b64_value(packed(i + 1:i + 1)) - c3 = packed(i + 2:i + 2) - c4 = packed(i + 3:i + 3) - - if (s0 < 0 .or. s1 < 0) then - res = "" - return - end if - - if (c3 == "=") then - if (c4 /= "=") then - res = "" - return - end if - if (i + 3 /= n) then + do i = 1, len(str) + val = DEC_LUT(iachar(str(i:i))) + + if (val >= 0) then + ! If we see a valid char AFTER padding has started (e.g., "AA=A"), it's invalid + if (pad_chars > 0) then res = "" return end if - - b0 = ior(ishft(s0, 2), ishft(s1, -4)) - if (b0 > 127_int32) then - bytes(p) = int(b0 - 256_int32, int8) - else - bytes(p) = int(b0, int8) - end if - p = p + 1 - cycle - end if - - s2 = b64_value(c3) - if (s2 < 0) then - res = "" - return - end if - - b0 = ior(ishft(s0, 2), ishft(s1, -4)) - b1 = ior(ishft(iand(s1, 15_int32), 4), ishft(s2, -2)) - - if (b0 > 127_int32) then - bytes(p) = int(b0 - 256_int32, int8) - else - bytes(p) = int(b0, int8) - end if - p = p + 1 - if (p <= out_len) then - if (b1 > 127_int32) then - bytes(p) = int(b1 - 256_int32, int8) - else - bytes(p) = int(b1, int8) + + n_chars = n_chars + 1 + buffer = ior(ishft(buffer, 6), val) + bits = bits + 6 + + if (bits >= 8) then + bits = bits - 8 + bytes(p) = int(iand(ishft(buffer, -bits), 255_int32), int8) + p = p + 1 end if - p = p + 1 - end if - - if (c4 == "=") then - if (i + 3 /= n) then + + else if (val == -3) then + ! Count padding characters + pad_chars = pad_chars + 1 + n_chars = n_chars + 1 + + ! More than 2 padding chars (e.g., "A===") is mathematically invalid in Base64 + if (pad_chars > 2) then res = "" return end if - cycle - end if - - s3 = b64_value(c4) - if (s3 < 0) then + + else if (val == -1) then + ! Invalid character encountered res = "" return end if - - b2 = ior(ishft(iand(s2, 3_int32), 6), s3) - if (p <= out_len) then - if (b2 > 127_int32) then - bytes(p) = int(b2 - 256_int32, int8) - else - bytes(p) = int(b2, int8) - end if - p = p + 1 - end if - end do - - allocate(character(len=out_len) :: res) - do i = 1, out_len - ! Masking ensures we treat the byte as unsigned 0-255 - res(i:i) = achar(iand(int(bytes(i), int32), 255_int32)) end do - end function base64_decode - elemental integer function b64_value(ch) result(val) - character, intent(in) :: ch - integer :: pos - - pos = index(B64_ALPHABET, ch) - if (pos == 0) then - val = -1 - else - val = pos - 1 + ! Strict Validation: Total non-whitespace chars must be a multiple of 4 (e.g., "abc" fails) + if (n_chars == 0 .or. mod(n_chars, 4) /= 0) then + res = "" + return end if - end function b64_value - elemental logical function is_b64_whitespace(ch) result(tf) - character, intent(in) :: ch + p = p - 1 + allocate(character(len=p) :: res) + do i = 1, p + res(i:i) = achar(iand(int(bytes(i), int32), 255_int32)) + end do - tf = ch == " " .or. ch == achar(9) .or. ch == achar(10) .or. & - ch == achar(13) .or. ch == achar(11) .or. ch == achar(12) - end function is_b64_whitespace + end function base64_decode -end module stdlib_base64 +end module stdlib_base64 \ No newline at end of file diff --git a/test/string/test_base64.f90 b/test/string/test_base64.f90 index 115a1bef1..172021f58 100644 --- a/test/string/test_base64.f90 +++ b/test/string/test_base64.f90 @@ -103,7 +103,6 @@ end subroutine test_roundtrip_complex subroutine test_roundtrip_logical(error) type(error_type), allocatable, intent(out) :: error logical(lk) :: vals(5), got(5) - integer(int8) :: temp_bytes(5 * storage_size(.true._lk)/8) ! Buffer character(len=:), allocatable :: enc, dec vals = [.true._lk, .false._lk, .true._lk, .true._lk, .false._lk] From d6d3103b2a631fcd623ffb466c03c947f766d933 Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Sun, 5 Apr 2026 02:21:42 +0530 Subject: [PATCH 3/7] refactor(strings): harden Base64 scalar fallback and align APIs This commit addresses reviewer feedback regarding memory safety, type portability, and documentation for the pure Fortran implementation. Key changes: - API Alignment: Updated base64_encode_into to mirror the decoder API by returning encoded_len and error_flag. - Memory Safety: Added explicit blank-filling and strict bounds checking to the encoder to prevent silent buffer overflows. - Branchless Bounds Checking: Implemented an iand(..., 255_int32) mask in the decoder to safely handle non-ASCII codepoints. - Strict Portability: Enforced int32 type consistency across all bitwise intrinsics and literal masks. - Documentation: Added FORD headers to the umbrella module. Signed-off-by: RatanKokal --- src/strings/CMakeLists.txt | 2 + src/strings/stdlib_base64.fypp | 330 ++------------------------ src/strings/stdlib_base64_decode.fypp | 207 ++++++++++++++++ src/strings/stdlib_base64_encode.fypp | 314 ++++++++++++++++++++++++ 4 files changed, 537 insertions(+), 316 deletions(-) create mode 100644 src/strings/stdlib_base64_decode.fypp create mode 100644 src/strings/stdlib_base64_encode.fypp diff --git a/src/strings/CMakeLists.txt b/src/strings/CMakeLists.txt index bc821866b..e12790797 100644 --- a/src/strings/CMakeLists.txt +++ b/src/strings/CMakeLists.txt @@ -1,4 +1,6 @@ set(strings_fppFiles + stdlib_base64_encode.fypp + stdlib_base64_decode.fypp stdlib_base64.fypp stdlib_string_type.fypp stdlib_string_type_constructor.fypp diff --git a/src/strings/stdlib_base64.fypp b/src/strings/stdlib_base64.fypp index 6cb89e61f..2dc9ad1de 100644 --- a/src/strings/stdlib_base64.fypp +++ b/src/strings/stdlib_base64.fypp @@ -1,324 +1,22 @@ ! SPDX-Identifier: MIT -#:include "common.fypp" + +!> Base64 encoding and decoding algorithms +!> +!> This module provides procedures to safely encode and decode data +!> using the standard Base64 encoding scheme defined in RFC 4648. +!> +!> @note +!> **Experimental:** This API is currently considered experimental and +!> may be subject to change in future releases of `stdlib`. module stdlib_base64 - use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk, c_bool + use stdlib_base64_encode, only: base64_encode, base64_encode_into + use stdlib_base64_decode, only: base64_decode, base64_decode_into + implicit none private - public :: base64_encode, base64_decode - - character(len=*), parameter :: B64_ALPHABET = & - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - - interface base64_encode -#:for k1, t1, _, _ in REAL_KINDS_TYPES - module procedure :: base64_encode_real_${k1}$ -#:endfor -#:for k1, t1, _, _ in INT_KINDS_TYPES - module procedure :: base64_encode_int_${k1}$ -#:endfor -#:for k1, t1, _ in CMPLX_KINDS_TYPES - module procedure :: base64_encode_cmplx_${k1}$ -#:endfor -#:for k1, t1 in LOG_KINDS_TYPES - module procedure :: base64_encode_logical_${k1}$ -#:endfor - end interface base64_encode - -contains - -#:for k1, t1, _, _ in REAL_KINDS_TYPES - module function base64_encode_real_${k1}$(data) result(str) - ${t1}$, intent(in) :: data(..) - character(len=:), allocatable :: str - - ${t1}$, allocatable :: flat(:) - integer(int8), allocatable :: bytes(:) - integer :: nbytes - - select rank(d => data) - rank(0) - allocate(flat(1)) - flat(1) = d - rank(1) - flat = d -#:for r in range(2, MAXRANK + 1) - rank(${r}$) - flat = pack(d, .true.) -#:endfor - rank default - str = "" - return - end select - - if (size(flat) == 0) then - str = "" - return - end if - - nbytes = size(flat) * storage_size(flat(1)) / 8 - bytes = transfer(flat, [0_int8], nbytes) - str = base64_encode_bytes(bytes) - end function base64_encode_real_${k1}$ - -#:endfor - -#:for k1, t1, _, _ in INT_KINDS_TYPES - module function base64_encode_int_${k1}$(data) result(str) - ${t1}$, intent(in) :: data(..) - character(len=:), allocatable :: str - - ${t1}$, allocatable :: flat(:) - integer(int8), allocatable :: bytes(:) - integer :: nbytes - - select rank(d => data) - rank(0) - allocate(flat(1)) - flat(1) = d - rank(1) - flat = d -#:for r in range(2, MAXRANK + 1) - rank(${r}$) - flat = pack(d, .true.) -#:endfor - rank default - str = "" - return - end select - - if (size(flat) == 0) then - str = "" - return - end if - - nbytes = size(flat) * storage_size(flat(1)) / 8 - bytes = transfer(flat, [0_int8], nbytes) - str = base64_encode_bytes(bytes) - end function base64_encode_int_${k1}$ - -#:endfor - -#:for k1, t1, _ in CMPLX_KINDS_TYPES - module function base64_encode_cmplx_${k1}$(data) result(str) - ${t1}$, intent(in) :: data(..) - character(len=:), allocatable :: str - - ${t1}$, allocatable :: flat(:) - integer(int8), allocatable :: bytes(:) - integer :: nbytes - - select rank(d => data) - rank(0) - allocate(flat(1)) - flat(1) = d - rank(1) - flat = d -#:for r in range(2, MAXRANK + 1) - rank(${r}$) - flat = pack(d, .true.) -#:endfor - rank default - str = "" - return - end select - - if (size(flat) == 0) then - str = "" - return - end if - - nbytes = size(flat) * storage_size(flat(1)) / 8 - bytes = transfer(flat, [0_int8], nbytes) - str = base64_encode_bytes(bytes) - end function base64_encode_cmplx_${k1}$ - -#:endfor - -#:for k1, t1 in LOG_KINDS_TYPES - module function base64_encode_logical_${k1}$(data) result(str) - ${t1}$, intent(in) :: data(..) - character(len=:), allocatable :: str - - ${t1}$, allocatable :: flat(:) - integer(int8), allocatable :: bytes(:) - integer :: nbytes - - select rank(d => data) - rank(0) - allocate(flat(1)) - flat(1) = d - rank(1) - flat = d -#:for r in range(2, MAXRANK + 1) - rank(${r}$) - flat = pack(d, .true.) -#:endfor - rank default - str = "" - return - end select - - if (size(flat) == 0) then - str = "" - return - end if - - nbytes = size(flat) * storage_size(flat(1)) / 8 - bytes = transfer(flat, [0_int8], nbytes) - str = base64_encode_bytes(bytes) - end function base64_encode_logical_${k1}$ - -#:endfor - - function base64_encode_bytes(bytes) result(str) - integer(int8), intent(in) :: bytes(:) - character(len=:), allocatable :: str - - integer :: nbytes, out_len, i, o, rem - integer(int32) :: b0, b1, b2, s0, s1, s2, s3 - - nbytes = size(bytes) - if (nbytes == 0) then - str = "" - return - end if - - out_len = 4 * ((nbytes + 2) / 3) - allocate(character(len=out_len) :: str) - - i = 1 - o = 1 - do while (i <= nbytes) - rem = nbytes - i + 1 - - b0 = iand(int(bytes(i), int32), 255_int32) - if (rem >= 2) then - b1 = iand(int(bytes(i + 1), int32), 255_int32) - else - b1 = 0_int32 - end if - if (rem >= 3) then - b2 = iand(int(bytes(i + 2), int32), 255_int32) - else - b2 = 0_int32 - end if - - s0 = ishft(b0, -2) - s1 = ior(ishft(iand(b0, 3_int32), 4), ishft(b1, -4)) - s2 = ior(ishft(iand(b1, 15_int32), 2), ishft(b2, -6)) - s3 = iand(b2, 63_int32) - - str(o:o) = B64_ALPHABET(s0 + 1:s0 + 1) - str(o+1:o+1) = B64_ALPHABET(s1 + 1:s1 + 1) - - if (rem == 1) then - str(o+2:o+3) = "==" - else if (rem == 2) then - str(o+2:o+2) = B64_ALPHABET(s2 + 1:s2 + 1) - str(o+3:o+3) = "=" - else - str(o+2:o+2) = B64_ALPHABET(s2 + 1:s2 + 1) - str(o+3:o+3) = B64_ALPHABET(s3 + 1:s3 + 1) - end if - - i = i + 3 - o = o + 4 - end do - end function base64_encode_bytes - - function base64_decode(str) result(res) - character(len=*), intent(in) :: str - character(len=:), allocatable :: res - - integer(int8), allocatable :: bytes(:) - integer :: i, p, val, bits - integer :: n_chars, pad_chars ! Added counters for strict validation - integer(int32) :: buffer - - ! Look-Up Table: 0-63=Values, -1=Invalid, -2=Whitespace, -3=Padding - integer(int32), parameter :: DEC_LUT(0:255) = [ & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -1, -1, & ! 0-15 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & ! 16-31 - -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, & ! 32-47 (+, /) - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -3, -1, -1, & ! 48-63 (0-9, =) - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, & ! 64-79 (A-O) - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, & ! 80-95 (P-Z) - -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, & ! 96-111 (a-o) - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, & ! 112-127 (p-z) - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & ! 128+ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, & - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ] - - if (len(str) == 0) then - res = "" - return - end if - - allocate(bytes((len(str) * 3) / 4 + 3)) - - buffer = 0_int32 - bits = 0 - p = 1 - n_chars = 0 - pad_chars = 0 - - do i = 1, len(str) - val = DEC_LUT(iachar(str(i:i))) - - if (val >= 0) then - ! If we see a valid char AFTER padding has started (e.g., "AA=A"), it's invalid - if (pad_chars > 0) then - res = "" - return - end if - - n_chars = n_chars + 1 - buffer = ior(ishft(buffer, 6), val) - bits = bits + 6 - - if (bits >= 8) then - bits = bits - 8 - bytes(p) = int(iand(ishft(buffer, -bits), 255_int32), int8) - p = p + 1 - end if - - else if (val == -3) then - ! Count padding characters - pad_chars = pad_chars + 1 - n_chars = n_chars + 1 - - ! More than 2 padding chars (e.g., "A===") is mathematically invalid in Base64 - if (pad_chars > 2) then - res = "" - return - end if - - else if (val == -1) then - ! Invalid character encountered - res = "" - return - end if - end do - - ! Strict Validation: Total non-whitespace chars must be a multiple of 4 (e.g., "abc" fails) - if (n_chars == 0 .or. mod(n_chars, 4) /= 0) then - res = "" - return - end if - - p = p - 1 - allocate(character(len=p) :: res) - do i = 1, p - res(i:i) = achar(iand(int(bytes(i), int32), 255_int32)) - end do - - end function base64_decode + public :: base64_encode, base64_encode_into + public :: base64_decode, base64_decode_into end module stdlib_base64 \ No newline at end of file diff --git a/src/strings/stdlib_base64_decode.fypp b/src/strings/stdlib_base64_decode.fypp new file mode 100644 index 000000000..086813876 --- /dev/null +++ b/src/strings/stdlib_base64_decode.fypp @@ -0,0 +1,207 @@ +! SPDX-Identifier: MIT + +module stdlib_base64_decode + use stdlib_kinds, only: int8, int32 + implicit none + private + + public :: base64_decode + public :: base64_decode_into + + integer(int8), parameter :: DT(0:255) = int( [ & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, & + 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, & + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, & + 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, & + -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, & + 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 & + ], int8) + + ! 0 for whitespace/control chars, 1 for valid chars + integer(int32), parameter :: IS_VAL(0:255) = int([ & + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, & + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, & + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 & + ], int32) + +contains + + ! POWER-USER API (Zero-Copy Subroutines) + subroutine base64_decode_into(str, res, decoded_len, error_flag, skip_despace) + character(len=*), intent(in) :: str + character(len=*), intent(out) :: res + integer, intent(out) :: decoded_len + logical, intent(out) :: error_flag + logical, intent(in), optional :: skip_despace + + integer(int8), allocatable :: filtered(:) + logical :: clean_mode + + integer(int32) :: v0, v1, v2, v3, c + integer :: i, j, slen, n_pad + integer(int32) :: error_accum + + error_flag = .false. + decoded_len = 0 + + if (len(str) == 0) return + + clean_mode = .false. + if (present(skip_despace)) clean_mode = skip_despace + + if (clean_mode) then + slen = len(str) + if (mod(slen, 4) /= 0) then; error_flag = .true.; return; end if + + n_pad = 0 + if (str(slen:slen) == '=') then + n_pad = 1 + if (str(slen-1:slen-1) == '=') n_pad = 2 + end if + + decoded_len = (slen / 4) * 3 - n_pad + if (len(res) < decoded_len) then; error_flag = .true.; decoded_len = 0; return; end if + else + allocate(filtered(len(str))) + j = 0 + do i = 1, len(str) + c = iand(int(iachar(str(i:i)), int32), 255_int32) + + ! ALWAYS write the byte to the next available slot + filtered(j + 1) = int(c, int8) + + ! Add 0 (if space) or 1 (if valid) to the counter + j = j + IS_VAL(c) + end do + slen = j + + if (slen == 0 .or. mod(slen, 4) /= 0) then; error_flag = .true.; return; end if + + n_pad = 0 + if (filtered(slen) == 61_int8) then + n_pad = 1 + if (filtered(slen-1) == 61_int8) n_pad = 2 + end if + + decoded_len = (slen / 4) * 3 - n_pad + if (len(res) < decoded_len) then; error_flag = .true.; decoded_len = 0; return; end if + end if + + error_accum = 0 + + ! Fortran Scalar Decoder Loop + do i = 1, slen - 7, 4 + if (clean_mode) then + v0 = int(DT(iand(int(iachar(str(i:i)), int32), 255_int32)), int32) + v1 = int(DT(iand(int(iachar(str(i+1:i+1)), int32), 255_int32)), int32) + v2 = int(DT(iand(int(iachar(str(i+2:i+2)), int32), 255_int32)), int32) + v3 = int(DT(iand(int(iachar(str(i+3:i+3)), int32), 255_int32)), int32) + else + v0 = int(DT(iand(int(filtered(i), int32), 255_int32)), int32) + v1 = int(DT(iand(int(filtered(i+1), int32), 255_int32)), int32) + v2 = int(DT(iand(int(filtered(i+2), int32), 255_int32)), int32) + v3 = int(DT(iand(int(filtered(i+3), int32), 255_int32)), int32) + end if + + error_accum = ior(error_accum, ior(v0, ior(v1, ior(v2, v3)))) + + j = (i - 1) / 4 * 3 + 1 + res(j:j) = char(ior(ishft(v0, 2), ishft(v1, -4))) + res(j+1:j+1) = char(ior(ishft(iand(v1, 15_int32), 4), ishft(v2, -2))) + res(j+2:j+2) = char(ior(ishft(iand(v2, 3_int32), 6), v3)) + end do + + if (error_accum < 0) then + error_flag = .true. + decoded_len = 0 + return + end if + + ! Fortran Tail Padding Logic + i = slen - 3 + j = (i - 1) / 4 * 3 + 1 + + if (clean_mode) then + c = iand(int(iachar(str(i:i)), int32), 255_int32) + v0 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + c = iand(int(iachar(str(i+1:i+1)), int32), 255_int32) + v1 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + c = iand(int(iachar(str(i+2:i+2)), int32), 255_int32) + v2 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + c = iand(int(iachar(str(i+3:i+3)), int32), 255_int32) + v3 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + else + c = iand(int(filtered(i), int32), 255_int32) + v0 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + c = iand(int(filtered(i+1), int32), 255_int32) + v1 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + c = iand(int(filtered(i+2), int32), 255_int32) + v2 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + c = iand(int(filtered(i+3), int32), 255_int32) + v3 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + end if + + if (v0 < 0 .or. v1 < 0) then; error_flag = .true.; decoded_len = 0; return; end if + res(j:j) = char(ior(ishft(v0, 2), ishft(v1, -4))) + + if (n_pad < 2) then + if (v2 < 0) then; error_flag = .true.; decoded_len = 0; return; end if + res(j+1:j+1) = char(ior(ishft(iand(v1, 15_int32), 4), ishft(v2, -2))) + end if + + if (n_pad == 0) then + if (v3 < 0) then; error_flag = .true.; decoded_len = 0; return; end if + res(j+2:j+2) = char(ior(ishft(iand(v2, 3_int32), 6), v3)) + end if + end subroutine base64_decode_into + + ! ERGONOMIC API (Returns allocatable strings) + function base64_decode(str) result(res) + character(len=*), intent(in) :: str + character(len=:), allocatable :: res + + integer :: decoded_len + logical :: error_flag + + if (len(str) == 0) then + allocate(character(len=0) :: res) + return + end if + + ! Pre-allocate maximum possible size needed + allocate(character(len=(len(str) / 4) * 3 + 3) :: res) + + call base64_decode_into(str, res, decoded_len, error_flag) + + if (error_flag) then + deallocate(res) + allocate(character(len=0) :: res) + else + res = res(1:decoded_len) + end if + end function base64_decode + +end module stdlib_base64_decode \ No newline at end of file diff --git a/src/strings/stdlib_base64_encode.fypp b/src/strings/stdlib_base64_encode.fypp new file mode 100644 index 000000000..3224f3dff --- /dev/null +++ b/src/strings/stdlib_base64_encode.fypp @@ -0,0 +1,314 @@ +! SPDX-Identifier: MIT +#:include "common.fypp" + +module stdlib_base64_encode + use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk + use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer, c_loc, c_size_t, c_int8_t, c_bool + implicit none + private + + public :: base64_encode, base64_encode_into + + character(len=64), parameter :: ALPHABET = & + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + + ! ERGONOMIC API (Returns allocatable strings) + interface base64_encode +#:for k1, t1, _, _ in REAL_KINDS_TYPES + module procedure :: base64_encode_real_${k1}$ +#:endfor +#:for k1, t1, _, _ in INT_KINDS_TYPES + module procedure :: base64_encode_int_${k1}$ +#:endfor +#:for k1, t1, _ in CMPLX_KINDS_TYPES + module procedure :: base64_encode_cmplx_${k1}$ +#:endfor +#:for k1, t1 in LOG_KINDS_TYPES + module procedure :: base64_encode_logical_${k1}$ +#:endfor + end interface base64_encode + + + ! POWER-USER API (Zero-Copy Subroutines) + interface base64_encode_into +#:for k1, t1, _, _ in REAL_KINDS_TYPES + module procedure :: base64_encode_into_real_${k1}$ +#:endfor +#:for k1, t1, _, _ in INT_KINDS_TYPES + module procedure :: base64_encode_into_int_${k1}$ +#:endfor +#:for k1, t1, _ in CMPLX_KINDS_TYPES + module procedure :: base64_encode_into_cmplx_${k1}$ +#:endfor +#:for k1, t1 in LOG_KINDS_TYPES + module procedure :: base64_encode_into_logical_${k1}$ +#:endfor + end interface base64_encode_into + + +contains + +#:for k1, t1, _, _ in REAL_KINDS_TYPES + function base64_encode_real_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable, target :: str + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then; str = ""; return; end if + call c_f_pointer(c_loc(data), raw, [nbytes]) + str = base64_encode_bytes(raw) + end function base64_encode_real_${k1}$ +#:endfor + +#:for k1, t1, _, _ in INT_KINDS_TYPES + function base64_encode_int_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable, target :: str + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then; str = ""; return; end if + call c_f_pointer(c_loc(data), raw, [nbytes]) + str = base64_encode_bytes(raw) + end function base64_encode_int_${k1}$ +#:endfor + +#:for k1, t1, _ in CMPLX_KINDS_TYPES + function base64_encode_cmplx_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable, target :: str + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then; str = ""; return; end if + call c_f_pointer(c_loc(data), raw, [nbytes]) + str = base64_encode_bytes(raw) + end function base64_encode_cmplx_${k1}$ +#:endfor + +#:for k1, t1 in LOG_KINDS_TYPES + function base64_encode_logical_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable, target :: str + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then; str = ""; return; end if + call c_f_pointer(c_loc(data), raw, [nbytes]) + str = base64_encode_bytes(raw) + end function base64_encode_logical_${k1}$ +#:endfor + +#:for k1, t1, _, _ in REAL_KINDS_TYPES + subroutine base64_encode_into_real_${k1}$(data, str, encoded_len, error_flag) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=*), intent(out), target :: str + integer, intent(out), optional :: encoded_len + logical, intent(out), optional :: error_flag + + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + integer :: elen + logical :: err + + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then + str = '' + if (present(encoded_len)) encoded_len = 0 + if (present(error_flag)) error_flag = .false. + return + end if + + call c_f_pointer(c_loc(data), raw, [nbytes]) + call base64_encode_bytes_into(raw, str, elen, err) + + if (present(encoded_len)) encoded_len = elen + if (present(error_flag)) error_flag = err + end subroutine base64_encode_into_real_${k1}$ +#:endfor + +#:for k1, t1, _, _ in INT_KINDS_TYPES + subroutine base64_encode_into_int_${k1}$(data, str, encoded_len, error_flag) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=*), intent(out), target :: str + integer, intent(out), optional :: encoded_len + logical, intent(out), optional :: error_flag + + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + integer :: elen + logical :: err + + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then + str = '' + if (present(encoded_len)) encoded_len = 0 + if (present(error_flag)) error_flag = .false. + return + end if + + call c_f_pointer(c_loc(data), raw, [nbytes]) + call base64_encode_bytes_into(raw, str, elen, err) + + if (present(encoded_len)) encoded_len = elen + if (present(error_flag)) error_flag = err + end subroutine base64_encode_into_int_${k1}$ +#:endfor + +#:for k1, t1, _ in CMPLX_KINDS_TYPES + subroutine base64_encode_into_cmplx_${k1}$(data, str, encoded_len, error_flag) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=*), intent(out), target :: str + integer, intent(out), optional :: encoded_len + logical, intent(out), optional :: error_flag + + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + integer :: elen + logical :: err + + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then + str = '' + if (present(encoded_len)) encoded_len = 0 + if (present(error_flag)) error_flag = .false. + return + end if + + call c_f_pointer(c_loc(data), raw, [nbytes]) + call base64_encode_bytes_into(raw, str, elen, err) + + if (present(encoded_len)) encoded_len = elen + if (present(error_flag)) error_flag = err + end subroutine base64_encode_into_cmplx_${k1}$ +#:endfor + +#:for k1, t1 in LOG_KINDS_TYPES + subroutine base64_encode_into_logical_${k1}$(data, str, encoded_len, error_flag) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=*), intent(out), target :: str + integer, intent(out), optional :: encoded_len + logical, intent(out), optional :: error_flag + + integer(int8), pointer :: raw(:) + integer(int64) :: nbytes + integer :: elen + logical :: err + + nbytes = int(size(data), int64) * storage_size(data) / 8 + if (nbytes == 0) then + str = '' + if (present(encoded_len)) encoded_len = 0 + if (present(error_flag)) error_flag = .false. + return + end if + + call c_f_pointer(c_loc(data), raw, [nbytes]) + call base64_encode_bytes_into(raw, str, elen, err) + + if (present(encoded_len)) encoded_len = elen + if (present(error_flag)) error_flag = err + end subroutine base64_encode_into_logical_${k1}$ +#:endfor + + function base64_encode_bytes(bytes) result(str) + integer(int8), intent(in), target, contiguous :: bytes(:) + character(len=:), allocatable, target :: str + integer(c_size_t) :: nbytes + integer :: str_len + integer :: i, j + integer(int32) :: triplet + + nbytes = size(bytes, kind=c_size_t) + str_len = 4 * ((int(nbytes) + 2) / 3) + + allocate(character(len=str_len) :: str) + if (nbytes == 0) return + + j = 1 + do i = 1, int(nbytes) - 2, 3 + triplet = ior(ishft(iand(int(bytes(i), int32), 255_int32), 16_int32), & + ior(ishft(iand(int(bytes(i+1), int32), 255_int32), 8_int32), & + iand(int(bytes(i+2), int32), 255_int32))) + + str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j+3:j+3) = ALPHABET(iand(triplet, 63_int32) + 1 : iand(triplet, 63_int32) + 1) + j = j + 4 + end do + + if (mod(nbytes, 3_c_size_t) == 1) then + triplet = ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 16_int32) + str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+3) = "==" + else if (mod(nbytes, 3_c_size_t) == 2) then + triplet = ior(ishft(iand(int(bytes(int(nbytes)-1), int32), 255_int32), 16_int32), & + ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 8_int32)) + str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j+3:j+3) = "=" + end if + + end function base64_encode_bytes + + subroutine base64_encode_bytes_into(bytes, str, encoded_len, error_flag) + integer(int8), intent(in), target, contiguous :: bytes(:) + character(len=*), intent(out), target :: str + integer, intent(out) :: encoded_len + logical, intent(out) :: error_flag + + integer(c_size_t) :: nbytes + integer :: str_len + integer :: i, j + integer(int32) :: triplet + + str = '' + encoded_len = 0 + error_flag = .false. + + nbytes = size(bytes, kind=c_size_t) + if (nbytes == 0) return + + str_len = 4 * ((int(nbytes) + 2) / 3) + + if (len(str) < str_len) then + error_flag = .true. + return + end if + + encoded_len = str_len + + j = 1 + do i = 1, int(nbytes) - 2, 3 + triplet = ior(ishft(iand(int(bytes(i), int32), 255_int32), 16_int32), & + ior(ishft(iand(int(bytes(i+1), int32), 255_int32), 8_int32), & + iand(int(bytes(i+2), int32), 255_int32))) + + str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j+3:j+3) = ALPHABET(iand(triplet, 63_int32) + 1 : iand(triplet, 63_int32) + 1) + j = j + 4 + end do + + if (mod(nbytes, 3_c_size_t) == 1) then + triplet = ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 16_int32) + str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+3) = "==" + else if (mod(nbytes, 3_c_size_t) == 2) then + triplet = ior(ishft(iand(int(bytes(int(nbytes)-1), int32), 255_int32), 16_int32), & + ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 8_int32)) + str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j+3:j+3) = "=" + end if + + end subroutine base64_encode_bytes_into + +end module stdlib_base64_encode \ No newline at end of file From 70e3540892f559fec34822214995a1908c230bcd Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Sun, 5 Apr 2026 03:19:00 +0530 Subject: [PATCH 4/7] string: improve base64 standard compliance and performance This commit hardens the base64 implementation by addressing standard compliance issues and optimizing the hot paths for the zero-copy API. Key changes: * Compliance: Replaced non-conforming 'c_loc' and 'c_f_pointer' usage with 'transfer' and 'select rank' blocks to ensure compatibility with non-C-interoperable types. * Correctness: Fixed a Unicode aliasing bug in the decoder by implementing a bitwise-OR 'unicode_accum' check to reject non-ASCII characters (>255). * Performance: Optimized 'base64_encode_into' by moving string blank-filling ('str = ""') off the hot path, preventing unnecessary memory operations during high-throughput encoding. * API Refinement: Restricted the 'base64_encode_into' power-user API to accept only 'integer(int8)' arrays to guarantee zero-copy performance while remaining standard-compliant. * Ergonomics: Added an optional 'error_flag' to 'base64_decode' to improve error handling for the allocating API. * Testing: Expanded unit tests to include coverage for the new subroutine signatures and error states. Signed-off-by: RatanKokal --- src/strings/stdlib_base64_decode.fypp | 65 ++++++--- src/strings/stdlib_base64_encode.fypp | 191 ++++++-------------------- test/string/test_base64.f90 | 65 ++++++++- 3 files changed, 144 insertions(+), 177 deletions(-) diff --git a/src/strings/stdlib_base64_decode.fypp b/src/strings/stdlib_base64_decode.fypp index 086813876..61a694f50 100644 --- a/src/strings/stdlib_base64_decode.fypp +++ b/src/strings/stdlib_base64_decode.fypp @@ -60,12 +60,13 @@ contains integer(int8), allocatable :: filtered(:) logical :: clean_mode - integer(int32) :: v0, v1, v2, v3, c + integer(int32) :: v0, v1, v2, v3, c, raw_c integer :: i, j, slen, n_pad - integer(int32) :: error_accum + integer(int32) :: error_accum, unicode_accum error_flag = .false. decoded_len = 0 + unicode_accum = 0 if (len(str) == 0) return @@ -88,17 +89,21 @@ contains allocate(filtered(len(str))) j = 0 do i = 1, len(str) - c = iand(int(iachar(str(i:i)), int32), 255_int32) + raw_c = int(iachar(str(i:i)), int32) + unicode_accum = ior(unicode_accum, raw_c) + c = iand(raw_c, 255_int32) ! ALWAYS write the byte to the next available slot filtered(j + 1) = int(c, int8) - - ! Add 0 (if space) or 1 (if valid) to the counter j = j + IS_VAL(c) end do slen = j - if (slen == 0 .or. mod(slen, 4) /= 0) then; error_flag = .true.; return; end if + ! Catch Unicode overflow before proceeding + if (unicode_accum > 255_int32 .or. slen == 0 .or. mod(slen, 4) /= 0) then + error_flag = .true. + return + end if n_pad = 0 if (filtered(slen) == 61_int8) then @@ -115,10 +120,18 @@ contains ! Fortran Scalar Decoder Loop do i = 1, slen - 7, 4 if (clean_mode) then - v0 = int(DT(iand(int(iachar(str(i:i)), int32), 255_int32)), int32) - v1 = int(DT(iand(int(iachar(str(i+1:i+1)), int32), 255_int32)), int32) - v2 = int(DT(iand(int(iachar(str(i+2:i+2)), int32), 255_int32)), int32) - v3 = int(DT(iand(int(iachar(str(i+3:i+3)), int32), 255_int32)), int32) + raw_c = int(iachar(str(i:i)), int32) + unicode_accum = ior(unicode_accum, raw_c) + v0 = int(DT(iand(raw_c, 255_int32)), int32) + raw_c = int(iachar(str(i+1:i+1)), int32) + unicode_accum = ior(unicode_accum, raw_c) + v1 = int(DT(iand(raw_c, 255_int32)), int32) + raw_c = int(iachar(str(i+2:i+2)), int32) + unicode_accum = ior(unicode_accum, raw_c) + v2 = int(DT(iand(raw_c, 255_int32)), int32) + raw_c = int(iachar(str(i+3:i+3)), int32) + unicode_accum = ior(unicode_accum, raw_c) + v3 = int(DT(iand(raw_c, 255_int32)), int32) else v0 = int(DT(iand(int(filtered(i), int32), 255_int32)), int32) v1 = int(DT(iand(int(filtered(i+1), int32), 255_int32)), int32) @@ -134,7 +147,7 @@ contains res(j+2:j+2) = char(ior(ishft(iand(v2, 3_int32), 6), v3)) end do - if (error_accum < 0) then + if (error_accum < 0 .or. unicode_accum > 255_int32) then error_flag = .true. decoded_len = 0 return @@ -145,13 +158,21 @@ contains j = (i - 1) / 4 * 3 + 1 if (clean_mode) then - c = iand(int(iachar(str(i:i)), int32), 255_int32) + raw_c = int(iachar(str(i:i)), int32) + unicode_accum = ior(unicode_accum, raw_c) + c = iand(raw_c, 255_int32) v0 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) - c = iand(int(iachar(str(i+1:i+1)), int32), 255_int32) + raw_c = int(iachar(str(i+1:i+1)), int32) + unicode_accum = ior(unicode_accum, raw_c) + c = iand(raw_c, 255_int32) v1 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) - c = iand(int(iachar(str(i+2:i+2)), int32), 255_int32) + raw_c = int(iachar(str(i+2:i+2)), int32) + unicode_accum = ior(unicode_accum, raw_c) + c = iand(raw_c, 255_int32) v2 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) - c = iand(int(iachar(str(i+3:i+3)), int32), 255_int32) + raw_c = int(iachar(str(i+3:i+3)), int32) + unicode_accum = ior(unicode_accum, raw_c) + c = iand(raw_c, 255_int32) v3 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) else c = iand(int(filtered(i), int32), 255_int32) @@ -164,7 +185,7 @@ contains v3 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) end if - if (v0 < 0 .or. v1 < 0) then; error_flag = .true.; decoded_len = 0; return; end if + if (v0 < 0 .or. v1 < 0 .or. unicode_accum > 255_int32) then; error_flag = .true.; decoded_len = 0; return; end if res(j:j) = char(ior(ishft(v0, 2), ishft(v1, -4))) if (n_pad < 2) then @@ -179,29 +200,33 @@ contains end subroutine base64_decode_into ! ERGONOMIC API (Returns allocatable strings) - function base64_decode(str) result(res) + function base64_decode(str, error_flag) result(res) character(len=*), intent(in) :: str + logical, intent(out), optional :: error_flag character(len=:), allocatable :: res integer :: decoded_len - logical :: error_flag + logical :: err if (len(str) == 0) then allocate(character(len=0) :: res) + if (present(error_flag)) error_flag = .false. return end if ! Pre-allocate maximum possible size needed allocate(character(len=(len(str) / 4) * 3 + 3) :: res) - call base64_decode_into(str, res, decoded_len, error_flag) + call base64_decode_into(str, res, decoded_len, err) - if (error_flag) then + if (err) then deallocate(res) allocate(character(len=0) :: res) else res = res(1:decoded_len) end if + + if (present(error_flag)) error_flag = err end function base64_decode end module stdlib_base64_decode \ No newline at end of file diff --git a/src/strings/stdlib_base64_encode.fypp b/src/strings/stdlib_base64_encode.fypp index 3224f3dff..acac97021 100644 --- a/src/strings/stdlib_base64_encode.fypp +++ b/src/strings/stdlib_base64_encode.fypp @@ -3,7 +3,7 @@ module stdlib_base64_encode use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk - use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer, c_loc, c_size_t, c_int8_t, c_bool + use, intrinsic :: iso_c_binding, only: c_size_t, c_bool implicit none private @@ -28,190 +28,75 @@ module stdlib_base64_encode #:endfor end interface base64_encode - - ! POWER-USER API (Zero-Copy Subroutines) - interface base64_encode_into -#:for k1, t1, _, _ in REAL_KINDS_TYPES - module procedure :: base64_encode_into_real_${k1}$ -#:endfor -#:for k1, t1, _, _ in INT_KINDS_TYPES - module procedure :: base64_encode_into_int_${k1}$ -#:endfor -#:for k1, t1, _ in CMPLX_KINDS_TYPES - module procedure :: base64_encode_into_cmplx_${k1}$ -#:endfor -#:for k1, t1 in LOG_KINDS_TYPES - module procedure :: base64_encode_into_logical_${k1}$ -#:endfor - end interface base64_encode_into - - contains +#:set RANKS = range(16) + #:for k1, t1, _, _ in REAL_KINDS_TYPES function base64_encode_real_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) - character(len=:), allocatable, target :: str - integer(int8), pointer :: raw(:) + character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 if (nbytes == 0) then; str = ""; return; end if - call c_f_pointer(c_loc(data), raw, [nbytes]) - str = base64_encode_bytes(raw) + ! Standard-compliant conversion using transfer + select rank(data) + #:for r in RANKS + rank(${r}$) + str = base64_encode_bytes(transfer(data, [0_int8], int(nbytes))) + #:endfor + end select end function base64_encode_real_${k1}$ #:endfor #:for k1, t1, _, _ in INT_KINDS_TYPES function base64_encode_int_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) - character(len=:), allocatable, target :: str - integer(int8), pointer :: raw(:) + character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 if (nbytes == 0) then; str = ""; return; end if - call c_f_pointer(c_loc(data), raw, [nbytes]) - str = base64_encode_bytes(raw) + select rank(data) + #:for r in RANKS + rank(${r}$) + str = base64_encode_bytes(transfer(data, [0_int8], int(nbytes))) + #:endfor + end select end function base64_encode_int_${k1}$ #:endfor #:for k1, t1, _ in CMPLX_KINDS_TYPES function base64_encode_cmplx_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) - character(len=:), allocatable, target :: str - integer(int8), pointer :: raw(:) + character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 if (nbytes == 0) then; str = ""; return; end if - call c_f_pointer(c_loc(data), raw, [nbytes]) - str = base64_encode_bytes(raw) + select rank(data) + #:for r in RANKS + rank(${r}$) + str = base64_encode_bytes(transfer(data, [0_int8], int(nbytes))) + #:endfor + end select end function base64_encode_cmplx_${k1}$ #:endfor #:for k1, t1 in LOG_KINDS_TYPES function base64_encode_logical_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) - character(len=:), allocatable, target :: str - integer(int8), pointer :: raw(:) + character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 if (nbytes == 0) then; str = ""; return; end if - call c_f_pointer(c_loc(data), raw, [nbytes]) - str = base64_encode_bytes(raw) + select rank(data) + #:for r in RANKS + rank(${r}$) + str = base64_encode_bytes(transfer(data, [0_int8], int(nbytes))) + #:endfor + end select end function base64_encode_logical_${k1}$ #:endfor -#:for k1, t1, _, _ in REAL_KINDS_TYPES - subroutine base64_encode_into_real_${k1}$(data, str, encoded_len, error_flag) - ${t1}$, intent(in), target, contiguous :: data(..) - character(len=*), intent(out), target :: str - integer, intent(out), optional :: encoded_len - logical, intent(out), optional :: error_flag - - integer(int8), pointer :: raw(:) - integer(int64) :: nbytes - integer :: elen - logical :: err - - nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then - str = '' - if (present(encoded_len)) encoded_len = 0 - if (present(error_flag)) error_flag = .false. - return - end if - - call c_f_pointer(c_loc(data), raw, [nbytes]) - call base64_encode_bytes_into(raw, str, elen, err) - - if (present(encoded_len)) encoded_len = elen - if (present(error_flag)) error_flag = err - end subroutine base64_encode_into_real_${k1}$ -#:endfor - -#:for k1, t1, _, _ in INT_KINDS_TYPES - subroutine base64_encode_into_int_${k1}$(data, str, encoded_len, error_flag) - ${t1}$, intent(in), target, contiguous :: data(..) - character(len=*), intent(out), target :: str - integer, intent(out), optional :: encoded_len - logical, intent(out), optional :: error_flag - - integer(int8), pointer :: raw(:) - integer(int64) :: nbytes - integer :: elen - logical :: err - - nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then - str = '' - if (present(encoded_len)) encoded_len = 0 - if (present(error_flag)) error_flag = .false. - return - end if - - call c_f_pointer(c_loc(data), raw, [nbytes]) - call base64_encode_bytes_into(raw, str, elen, err) - - if (present(encoded_len)) encoded_len = elen - if (present(error_flag)) error_flag = err - end subroutine base64_encode_into_int_${k1}$ -#:endfor - -#:for k1, t1, _ in CMPLX_KINDS_TYPES - subroutine base64_encode_into_cmplx_${k1}$(data, str, encoded_len, error_flag) - ${t1}$, intent(in), target, contiguous :: data(..) - character(len=*), intent(out), target :: str - integer, intent(out), optional :: encoded_len - logical, intent(out), optional :: error_flag - - integer(int8), pointer :: raw(:) - integer(int64) :: nbytes - integer :: elen - logical :: err - - nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then - str = '' - if (present(encoded_len)) encoded_len = 0 - if (present(error_flag)) error_flag = .false. - return - end if - - call c_f_pointer(c_loc(data), raw, [nbytes]) - call base64_encode_bytes_into(raw, str, elen, err) - - if (present(encoded_len)) encoded_len = elen - if (present(error_flag)) error_flag = err - end subroutine base64_encode_into_cmplx_${k1}$ -#:endfor - -#:for k1, t1 in LOG_KINDS_TYPES - subroutine base64_encode_into_logical_${k1}$(data, str, encoded_len, error_flag) - ${t1}$, intent(in), target, contiguous :: data(..) - character(len=*), intent(out), target :: str - integer, intent(out), optional :: encoded_len - logical, intent(out), optional :: error_flag - - integer(int8), pointer :: raw(:) - integer(int64) :: nbytes - integer :: elen - logical :: err - - nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then - str = '' - if (present(encoded_len)) encoded_len = 0 - if (present(error_flag)) error_flag = .false. - return - end if - - call c_f_pointer(c_loc(data), raw, [nbytes]) - call base64_encode_bytes_into(raw, str, elen, err) - - if (present(encoded_len)) encoded_len = elen - if (present(error_flag)) error_flag = err - end subroutine base64_encode_into_logical_${k1}$ -#:endfor - function base64_encode_bytes(bytes) result(str) integer(int8), intent(in), target, contiguous :: bytes(:) character(len=:), allocatable, target :: str @@ -255,7 +140,8 @@ contains end function base64_encode_bytes - subroutine base64_encode_bytes_into(bytes, str, encoded_len, error_flag) + ! POWER-USER API (Zero-Copy Subroutine) + subroutine base64_encode_into(bytes, str, encoded_len, error_flag) integer(int8), intent(in), target, contiguous :: bytes(:) character(len=*), intent(out), target :: str integer, intent(out) :: encoded_len @@ -266,16 +152,19 @@ contains integer :: i, j integer(int32) :: triplet - str = '' encoded_len = 0 error_flag = .false. nbytes = size(bytes, kind=c_size_t) - if (nbytes == 0) return + if (nbytes == 0) then + str = '' ! Blank-fill safely on edge case + return + end if str_len = 4 * ((int(nbytes) + 2) / 3) if (len(str) < str_len) then + str = '' ! Wipe partial data on error error_flag = .true. return end if @@ -309,6 +198,6 @@ contains str(j+3:j+3) = "=" end if - end subroutine base64_encode_bytes_into + end subroutine base64_encode_into end module stdlib_base64_encode \ No newline at end of file diff --git a/test/string/test_base64.f90 b/test/string/test_base64.f90 index 172021f58..0fd5ff257 100644 --- a/test/string/test_base64.f90 +++ b/test/string/test_base64.f90 @@ -1,7 +1,7 @@ ! SPDX-Identifier: MIT module test_base64 use testdrive, only : new_unittest, unittest_type, error_type, check - use stdlib_base64, only : base64_encode, base64_decode + use stdlib_base64, only : base64_encode, base64_decode, base64_encode_into, base64_decode_into use stdlib_kinds, only : int8, int32, dp, lk implicit none @@ -18,7 +18,9 @@ subroutine collect_base64(testsuite) new_unittest("base64-roundtrip-real", test_roundtrip_real), & new_unittest("base64-roundtrip-complex", test_roundtrip_complex), & new_unittest("base64-roundtrip-logical", test_roundtrip_logical), & - new_unittest("base64-rank0", test_rank0_encode) & + new_unittest("base64-rank0", test_rank0_encode), & + new_unittest("base64-encode-into", test_encode_into), & + new_unittest("base64-decode-into", test_decode_into) & ] end subroutine collect_base64 @@ -47,9 +49,13 @@ end subroutine test_decode_whitespace subroutine test_decode_invalid(error) type(error_type), allocatable, intent(out) :: error + logical :: err_flag - call check(error, base64_decode("abc") == "") + call check(error, base64_decode("abc", err_flag) == "") if (allocated(error)) return + call check(error, err_flag) + if (allocated(error)) return + call check(error, base64_decode("A===") == "") if (allocated(error)) return call check(error, base64_decode("AA=A") == "") @@ -109,7 +115,6 @@ subroutine test_roundtrip_logical(error) enc = base64_encode(vals) dec = base64_decode(enc) - got = transfer(dec, got) call check(error, all((vals .neqv. .false._lk) .eqv. (got .neqv. .false._lk))) @@ -124,8 +129,56 @@ subroutine test_rank0_encode(error) call check(error, len(base64_encode(v)) > 0) end subroutine test_rank0_encode -end module test_base64 + subroutine test_encode_into(error) + type(error_type), allocatable, intent(out) :: error + integer(int8) :: bytes(3) + character(len=4) :: str + character(len=2) :: small_str + integer :: elen + logical :: err + + bytes = [int(77, int8), int(97, int8), int(110, int8)] + ! Test successful zero-copy encode + call base64_encode_into(bytes, str, elen, err) + call check(error, .not. err) + if (allocated(error)) return + call check(error, elen == 4) + if (allocated(error)) return + call check(error, str == "TWFu") + if (allocated(error)) return + + ! Test buffer too small (Expect Error) + call base64_encode_into(bytes, small_str, elen, err) + call check(error, err) + if (allocated(error)) return + call check(error, small_str == "") + end subroutine test_encode_into + + subroutine test_decode_into(error) + type(error_type), allocatable, intent(out) :: error + character(len=4) :: str = "TWFu" + character(len=3) :: res + character(len=1) :: small_res + integer :: dlen + logical :: err + + ! Test successful decode + call base64_decode_into(str, res, dlen, err) + call check(error, .not. err) + if (allocated(error)) return + call check(error, dlen == 3) + if (allocated(error)) return + call check(error, res == "Man") + if (allocated(error)) return + + ! Test buffer too small (Expect Error) + call base64_decode_into(str, small_res, dlen, err) + call check(error, err) + if (allocated(error)) return + end subroutine test_decode_into + +end module test_base64 program tester use, intrinsic :: iso_fortran_env, only : error_unit @@ -151,4 +204,4 @@ program tester write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" error stop end if -end program tester +end program tester \ No newline at end of file From e21a325d1132dff39f88c18be697e5fbbed60045 Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Sun, 5 Apr 2026 14:58:56 +0530 Subject: [PATCH 5/7] ref: base64 architecture to submodules and apply stdlib standards This commit addresses the architectural and stylistic feedback for the Base64 implementation: * Architecture: Consolidated interfaces into a single stdlib_base64 parent module and moved the high-performance logic into encode and decode submodules to prevent compilation cascades. * Standards: Replaced the logical error flag with type(state_type) from stdlib_error for consistent library-wide error handling. * Optimization: Marked the core _into subroutines as pure to guarantee no side effects and allow for aggressive compiler optimizations. * Dependency: Imported base64_alphabet directly from stdlib_ascii to reduce redundancy. * Documentation: Added explanatory comments for the branchless DT and IS_VAL lookup tables. * Formatting: Expanded semicolon-separated multi-statement lines into single lines to improve debuggability and prevent CI truncation errors. Signed-off-by: RatanKokal --- src/core/stdlib_ascii.fypp | 2 + src/strings/stdlib_base64.fypp | 161 +++++++++++++++++++++++++- src/strings/stdlib_base64_decode.fypp | 158 ++++++++++++------------- src/strings/stdlib_base64_encode.fypp | 156 +++++++++++++------------ test/string/test_base64.f90 | 17 +-- 5 files changed, 320 insertions(+), 174 deletions(-) diff --git a/src/core/stdlib_ascii.fypp b/src/core/stdlib_ascii.fypp index fa062900d..b6341703a 100644 --- a/src/core/stdlib_ascii.fypp +++ b/src/core/stdlib_ascii.fypp @@ -65,6 +65,8 @@ module stdlib_ascii character(len=*), public, parameter :: letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" !! A .. Za .. z character(len=*), public, parameter :: uppercase = letters(1:26) !! A .. Z character(len=*), public, parameter :: lowercase = letters(27:) !! a .. z + character(len=*), public, parameter :: base64_alphabet = & + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" !! RFC 4648 Base64 alphabet character(len=*), public, parameter :: whitespace = " "//TAB//VT//CR//LF//FF !! ASCII _whitespace diff --git a/src/strings/stdlib_base64.fypp b/src/strings/stdlib_base64.fypp index 2dc9ad1de..38297ef99 100644 --- a/src/strings/stdlib_base64.fypp +++ b/src/strings/stdlib_base64.fypp @@ -1,4 +1,5 @@ ! SPDX-Identifier: MIT +#:include "common.fypp" !> Base64 encoding and decoding algorithms !> @@ -10,13 +11,167 @@ !> may be subject to change in future releases of `stdlib`. module stdlib_base64 - use stdlib_base64_encode, only: base64_encode, base64_encode_into - use stdlib_base64_decode, only: base64_decode, base64_decode_into - + use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk + use, intrinsic :: iso_c_binding, only: c_size_t, c_bool + use stdlib_ascii, only: base64_alphabet + use stdlib_error, only: state_type + implicit none private public :: base64_encode, base64_encode_into public :: base64_decode, base64_decode_into + ! Branchless RFC 4648 decode map: byte -> 6-bit value, invalid -> -1. + ! The main loop OR-reduces values and checks once at the end. + integer(int8), parameter :: DT(0:255) = int( [ & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, & + 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, & + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, & + 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, & + -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, & + 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 & + ], int8) + + ! Branchless stream-compaction mask for despace step: keep(1) / drop(0). + integer(int32), parameter :: IS_VAL(0:255) = int([ & + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, & + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, & + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 & + ], int32) + + !> Encodes intrinsic arrays into Base64 text. + !> + !> This is the ergonomic API for encoding. It allocates and returns the + !> output string automatically. + !> + !> Use this form when convenience matters more than avoiding allocations. + !> + !> Example: + !>```fortran + !> character(len=:), allocatable :: encoded + !> integer(int8) :: bytes(3) = [77_int8, 97_int8, 110_int8] + !> encoded = base64_encode(bytes) ! "TWFu" + !>``` + interface base64_encode +#:for k1, t1, _, _ in REAL_KINDS_TYPES + module procedure :: base64_encode_real_${k1}$ +#:endfor +#:for k1, t1, _, _ in INT_KINDS_TYPES + module procedure :: base64_encode_int_${k1}$ +#:endfor +#:for k1, t1, _ in CMPLX_KINDS_TYPES + module procedure :: base64_encode_cmplx_${k1}$ +#:endfor +#:for k1, t1 in LOG_KINDS_TYPES + module procedure :: base64_encode_logical_${k1}$ +#:endfor + end interface base64_encode + + interface +#:for k1, t1, _, _ in REAL_KINDS_TYPES + module function base64_encode_real_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable :: str + end function base64_encode_real_${k1}$ +#:endfor + +#:for k1, t1, _, _ in INT_KINDS_TYPES + module function base64_encode_int_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable :: str + end function base64_encode_int_${k1}$ +#:endfor + +#:for k1, t1, _ in CMPLX_KINDS_TYPES + module function base64_encode_cmplx_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable :: str + end function base64_encode_cmplx_${k1}$ +#:endfor + +#:for k1, t1 in LOG_KINDS_TYPES + module function base64_encode_logical_${k1}$(data) result(str) + ${t1}$, intent(in), target, contiguous :: data(..) + character(len=:), allocatable :: str + end function base64_encode_logical_${k1}$ +#:endfor + + module function base64_encode_bytes(bytes) result(str) + integer(int8), intent(in), target, contiguous :: bytes(:) + character(len=:), allocatable, target :: str + end function base64_encode_bytes + + !> Encodes bytes into a caller-provided output buffer. + !> + !> This is the preallocated API for throughput-sensitive workflows. + !> It does not allocate and reports status through `err_state`. + !> + !> On success, `err_state%ok()` is `.true.` and `encoded_len` is the + !> number of meaningful characters written into `str`. + pure module subroutine base64_encode_into(bytes, str, encoded_len, err_state) + integer(int8), intent(in), target, contiguous :: bytes(:) + character(len=*), intent(out), target :: str + integer, intent(out) :: encoded_len + type(state_type), intent(out) :: err_state + end subroutine base64_encode_into + + !> Decodes Base64 text into a caller-provided output buffer. + !> + !> This is the preallocated API for throughput-sensitive workflows. + !> It does not allocate and reports status through `err_state`. + !> + !> The optional `skip_despace` input can be used when the input is + !> already whitespace-free. + pure module subroutine base64_decode_into(str, res, decoded_len, err_state, skip_despace) + character(len=*), intent(in) :: str + character(len=*), intent(out) :: res + integer, intent(out) :: decoded_len + type(state_type), intent(out) :: err_state + logical, intent(in), optional :: skip_despace + end subroutine base64_decode_into + + !> Decodes Base64 text and returns an allocated byte-string. + !> + !> This is the ergonomic API for decoding. It allocates and returns + !> the result automatically. + !> + !> On error, an empty result is returned. If `err_state` is present, + !> details are stored there. + !> + !> Example: + !>```fortran + !> character(len=:), allocatable :: decoded + !> decoded = base64_decode("TWFu") ! "Man" + !>``` + module function base64_decode(str, err_state) result(res) + character(len=*), intent(in) :: str + type(state_type), intent(out), optional :: err_state + character(len=:), allocatable :: res + end function base64_decode + end interface + end module stdlib_base64 \ No newline at end of file diff --git a/src/strings/stdlib_base64_decode.fypp b/src/strings/stdlib_base64_decode.fypp index 61a694f50..fb6377390 100644 --- a/src/strings/stdlib_base64_decode.fypp +++ b/src/strings/stdlib_base64_decode.fypp @@ -1,70 +1,25 @@ ! SPDX-Identifier: MIT -module stdlib_base64_decode - use stdlib_kinds, only: int8, int32 - implicit none - private - - public :: base64_decode - public :: base64_decode_into - - integer(int8), parameter :: DT(0:255) = int( [ & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, & - 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, & - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, & - 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, & - -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, & - 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, & - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 & - ], int8) - - ! 0 for whitespace/control chars, 1 for valid chars - integer(int32), parameter :: IS_VAL(0:255) = int([ & - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, & - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, & - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, & - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 & - ], int32) +submodule(stdlib_base64) stdlib_base64_decode + use stdlib_error, only: state_type, STDLIB_SUCCESS, STDLIB_VALUE_ERROR contains - ! POWER-USER API (Zero-Copy Subroutines) - subroutine base64_decode_into(str, res, decoded_len, error_flag, skip_despace) + pure module subroutine base64_decode_into(str, res, decoded_len, err_state, skip_despace) character(len=*), intent(in) :: str character(len=*), intent(out) :: res - integer, intent(out) :: decoded_len - logical, intent(out) :: error_flag + integer, intent(out) :: decoded_len + type(state_type), intent(out) :: err_state logical, intent(in), optional :: skip_despace integer(int8), allocatable :: filtered(:) logical :: clean_mode - + integer(int32) :: v0, v1, v2, v3, c, raw_c integer :: i, j, slen, n_pad integer(int32) :: error_accum, unicode_accum - error_flag = .false. + err_state = state_type(STDLIB_SUCCESS) decoded_len = 0 unicode_accum = 0 @@ -75,7 +30,11 @@ contains if (clean_mode) then slen = len(str) - if (mod(slen, 4) /= 0) then; error_flag = .true.; return; end if + if (mod(slen, 4) /= 0) then + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Input length must be a multiple of 4") + return + end if n_pad = 0 if (str(slen:slen) == '=') then @@ -84,7 +43,12 @@ contains end if decoded_len = (slen / 4) * 3 - n_pad - if (len(res) < decoded_len) then; error_flag = .true.; decoded_len = 0; return; end if + if (len(res) < decoded_len) then + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Output buffer too small") + decoded_len = 0 + return + end if else allocate(filtered(len(str))) j = 0 @@ -92,16 +56,14 @@ contains raw_c = int(iachar(str(i:i)), int32) unicode_accum = ior(unicode_accum, raw_c) c = iand(raw_c, 255_int32) - - ! ALWAYS write the byte to the next available slot filtered(j + 1) = int(c, int8) j = j + IS_VAL(c) end do slen = j - - ! Catch Unicode overflow before proceeding + if (unicode_accum > 255_int32 .or. slen == 0 .or. mod(slen, 4) /= 0) then - error_flag = .true. + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Invalid Base64 byte stream") return end if @@ -112,7 +74,12 @@ contains end if decoded_len = (slen / 4) * 3 - n_pad - if (len(res) < decoded_len) then; error_flag = .true.; decoded_len = 0; return; end if + if (len(res) < decoded_len) then + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Output buffer too small") + decoded_len = 0 + return + end if end if error_accum = 0 @@ -138,7 +105,7 @@ contains v2 = int(DT(iand(int(filtered(i+2), int32), 255_int32)), int32) v3 = int(DT(iand(int(filtered(i+3), int32), 255_int32)), int32) end if - + error_accum = ior(error_accum, ior(v0, ior(v1, ior(v2, v3)))) j = (i - 1) / 4 * 3 + 1 @@ -148,7 +115,8 @@ contains end do if (error_accum < 0 .or. unicode_accum > 255_int32) then - error_flag = .true. + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Invalid Base64 alphabet or non-ASCII input") decoded_len = 0 return end if @@ -161,72 +129,88 @@ contains raw_c = int(iachar(str(i:i)), int32) unicode_accum = ior(unicode_accum, raw_c) c = iand(raw_c, 255_int32) - v0 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v0 = int(DT(c), int32) raw_c = int(iachar(str(i+1:i+1)), int32) unicode_accum = ior(unicode_accum, raw_c) c = iand(raw_c, 255_int32) - v1 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v1 = int(DT(c), int32) raw_c = int(iachar(str(i+2:i+2)), int32) unicode_accum = ior(unicode_accum, raw_c) c = iand(raw_c, 255_int32) - v2 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v2 = int(DT(c), int32) raw_c = int(iachar(str(i+3:i+3)), int32) unicode_accum = ior(unicode_accum, raw_c) c = iand(raw_c, 255_int32) - v3 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v3 = int(DT(c), int32) else c = iand(int(filtered(i), int32), 255_int32) - v0 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v0 = int(DT(c), int32) c = iand(int(filtered(i+1), int32), 255_int32) - v1 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v1 = int(DT(c), int32) c = iand(int(filtered(i+2), int32), 255_int32) - v2 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v2 = int(DT(c), int32) c = iand(int(filtered(i+3), int32), 255_int32) - v3 = merge(-1_int32, int(DT(min(c,127)), int32), c > 127_int32) + v3 = int(DT(c), int32) + end if + + if (v0 < 0 .or. v1 < 0 .or. unicode_accum > 255_int32) then + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Invalid final quartet") + decoded_len = 0 + return end if - if (v0 < 0 .or. v1 < 0 .or. unicode_accum > 255_int32) then; error_flag = .true.; decoded_len = 0; return; end if res(j:j) = char(ior(ishft(v0, 2), ishft(v1, -4))) if (n_pad < 2) then - if (v2 < 0) then; error_flag = .true.; decoded_len = 0; return; end if + if (v2 < 0) then + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Invalid final quartet") + decoded_len = 0 + return + end if res(j+1:j+1) = char(ior(ishft(iand(v1, 15_int32), 4), ishft(v2, -2))) end if if (n_pad == 0) then - if (v3 < 0) then; error_flag = .true.; decoded_len = 0; return; end if + if (v3 < 0) then + err_state = state_type("base64_decode_into", STDLIB_VALUE_ERROR, & + "Invalid final quartet") + decoded_len = 0 + return + end if res(j+2:j+2) = char(ior(ishft(iand(v2, 3_int32), 6), v3)) end if end subroutine base64_decode_into - ! ERGONOMIC API (Returns allocatable strings) - function base64_decode(str, error_flag) result(res) - character(len=*), intent(in) :: str - logical, intent(out), optional :: error_flag + module function base64_decode(str, err_state) result(res) + character(len=*), intent(in) :: str + type(state_type), intent(out), optional :: err_state character(len=:), allocatable :: res - + integer :: decoded_len - logical :: err - + type(state_type) :: local_state + if (len(str) == 0) then allocate(character(len=0) :: res) - if (present(error_flag)) error_flag = .false. + local_state = state_type(STDLIB_SUCCESS) + if (present(err_state)) err_state = local_state return end if ! Pre-allocate maximum possible size needed allocate(character(len=(len(str) / 4) * 3 + 3) :: res) - - call base64_decode_into(str, res, decoded_len, err) - - if (err) then + + call base64_decode_into(str, res, decoded_len, local_state) + + if (.not. local_state%ok()) then deallocate(res) allocate(character(len=0) :: res) else res = res(1:decoded_len) end if - if (present(error_flag)) error_flag = err + if (present(err_state)) err_state = local_state end function base64_decode -end module stdlib_base64_decode \ No newline at end of file +end submodule stdlib_base64_decode \ No newline at end of file diff --git a/src/strings/stdlib_base64_encode.fypp b/src/strings/stdlib_base64_encode.fypp index acac97021..471bb6e2b 100644 --- a/src/strings/stdlib_base64_encode.fypp +++ b/src/strings/stdlib_base64_encode.fypp @@ -1,44 +1,23 @@ ! SPDX-Identifier: MIT #:include "common.fypp" -module stdlib_base64_encode - use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk - use, intrinsic :: iso_c_binding, only: c_size_t, c_bool - implicit none - private - - public :: base64_encode, base64_encode_into - - character(len=64), parameter :: ALPHABET = & - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - - ! ERGONOMIC API (Returns allocatable strings) - interface base64_encode -#:for k1, t1, _, _ in REAL_KINDS_TYPES - module procedure :: base64_encode_real_${k1}$ -#:endfor -#:for k1, t1, _, _ in INT_KINDS_TYPES - module procedure :: base64_encode_int_${k1}$ -#:endfor -#:for k1, t1, _ in CMPLX_KINDS_TYPES - module procedure :: base64_encode_cmplx_${k1}$ -#:endfor -#:for k1, t1 in LOG_KINDS_TYPES - module procedure :: base64_encode_logical_${k1}$ -#:endfor - end interface base64_encode +submodule(stdlib_base64) stdlib_base64_encode + use stdlib_error, only: state_type, STDLIB_SUCCESS, STDLIB_VALUE_ERROR contains #:set RANKS = range(16) #:for k1, t1, _, _ in REAL_KINDS_TYPES - function base64_encode_real_${k1}$(data) result(str) + module function base64_encode_real_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then; str = ""; return; end if + if (nbytes == 0) then + str = "" + return + end if ! Standard-compliant conversion using transfer select rank(data) #:for r in RANKS @@ -50,12 +29,15 @@ contains #:endfor #:for k1, t1, _, _ in INT_KINDS_TYPES - function base64_encode_int_${k1}$(data) result(str) + module function base64_encode_int_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then; str = ""; return; end if + if (nbytes == 0) then + str = "" + return + end if select rank(data) #:for r in RANKS rank(${r}$) @@ -66,12 +48,15 @@ contains #:endfor #:for k1, t1, _ in CMPLX_KINDS_TYPES - function base64_encode_cmplx_${k1}$(data) result(str) + module function base64_encode_cmplx_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then; str = ""; return; end if + if (nbytes == 0) then + str = "" + return + end if select rank(data) #:for r in RANKS rank(${r}$) @@ -82,12 +67,15 @@ contains #:endfor #:for k1, t1 in LOG_KINDS_TYPES - function base64_encode_logical_${k1}$(data) result(str) + module function base64_encode_logical_${k1}$(data) result(str) ${t1}$, intent(in), target, contiguous :: data(..) character(len=:), allocatable :: str integer(int64) :: nbytes nbytes = int(size(data), int64) * storage_size(data) / 8 - if (nbytes == 0) then; str = ""; return; end if + if (nbytes == 0) then + str = "" + return + end if select rank(data) #:for r in RANKS rank(${r}$) @@ -97,17 +85,17 @@ contains end function base64_encode_logical_${k1}$ #:endfor - function base64_encode_bytes(bytes) result(str) + module function base64_encode_bytes(bytes) result(str) integer(int8), intent(in), target, contiguous :: bytes(:) character(len=:), allocatable, target :: str integer(c_size_t) :: nbytes integer :: str_len integer :: i, j - integer(int32) :: triplet + integer(int32) :: triplet nbytes = size(bytes, kind=c_size_t) str_len = 4 * ((int(nbytes) + 2) / 3) - + allocate(character(len=str_len) :: str) if (nbytes == 0) return @@ -116,59 +104,67 @@ contains triplet = ior(ishft(iand(int(bytes(i), int32), 255_int32), 16_int32), & ior(ishft(iand(int(bytes(i+1), int32), 255_int32), 8_int32), & iand(int(bytes(i+2), int32), 255_int32))) - - str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) - str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) - str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) - str(j+3:j+3) = ALPHABET(iand(triplet, 63_int32) + 1 : iand(triplet, 63_int32) + 1) + + str(j:j) = base64_alphabet( & + ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = base64_alphabet( & + iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = base64_alphabet( & + iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j+3:j+3) = base64_alphabet(iand(triplet, 63_int32) + 1 : iand(triplet, 63_int32) + 1) j = j + 4 end do - + if (mod(nbytes, 3_c_size_t) == 1) then triplet = ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 16_int32) - str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) - str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j:j) = base64_alphabet( & + ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = base64_alphabet( & + iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) str(j+2:j+3) = "==" else if (mod(nbytes, 3_c_size_t) == 2) then triplet = ior(ishft(iand(int(bytes(int(nbytes)-1), int32), 255_int32), 16_int32), & ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 8_int32)) - str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) - str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) - str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j:j) = base64_alphabet( & + ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = base64_alphabet( & + iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = base64_alphabet( & + iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) str(j+3:j+3) = "=" end if - + end function base64_encode_bytes - ! POWER-USER API (Zero-Copy Subroutine) - subroutine base64_encode_into(bytes, str, encoded_len, error_flag) + pure module subroutine base64_encode_into(bytes, str, encoded_len, err_state) integer(int8), intent(in), target, contiguous :: bytes(:) character(len=*), intent(out), target :: str - integer, intent(out) :: encoded_len - logical, intent(out) :: error_flag - + integer, intent(out) :: encoded_len + type(state_type), intent(out) :: err_state + integer(c_size_t) :: nbytes integer :: str_len integer :: i, j - integer(int32) :: triplet + integer(int32) :: triplet encoded_len = 0 - error_flag = .false. + err_state = state_type(STDLIB_SUCCESS) nbytes = size(bytes, kind=c_size_t) if (nbytes == 0) then - str = '' ! Blank-fill safely on edge case + str = '' return end if - + str_len = 4 * ((int(nbytes) + 2) / 3) - + if (len(str) < str_len) then - str = '' ! Wipe partial data on error - error_flag = .true. - return + str = '' + err_state = state_type("base64_encode_into", STDLIB_VALUE_ERROR, & + "Output buffer too small") + return end if - + encoded_len = str_len j = 1 @@ -176,28 +172,36 @@ contains triplet = ior(ishft(iand(int(bytes(i), int32), 255_int32), 16_int32), & ior(ishft(iand(int(bytes(i+1), int32), 255_int32), 8_int32), & iand(int(bytes(i+2), int32), 255_int32))) - - str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) - str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) - str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) - str(j+3:j+3) = ALPHABET(iand(triplet, 63_int32) + 1 : iand(triplet, 63_int32) + 1) + + str(j:j) = base64_alphabet( & + ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = base64_alphabet( & + iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = base64_alphabet( & + iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j+3:j+3) = base64_alphabet(iand(triplet, 63_int32) + 1 : iand(triplet, 63_int32) + 1) j = j + 4 end do - + if (mod(nbytes, 3_c_size_t) == 1) then triplet = ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 16_int32) - str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) - str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j:j) = base64_alphabet( & + ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = base64_alphabet( & + iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) str(j+2:j+3) = "==" else if (mod(nbytes, 3_c_size_t) == 2) then triplet = ior(ishft(iand(int(bytes(int(nbytes)-1), int32), 255_int32), 16_int32), & ishft(iand(int(bytes(int(nbytes)), int32), 255_int32), 8_int32)) - str(j:j) = ALPHABET(ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) - str(j+1:j+1) = ALPHABET(iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) - str(j+2:j+2) = ALPHABET(iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) + str(j:j) = base64_alphabet( & + ishft(triplet, -18_int32) + 1 : ishft(triplet, -18_int32) + 1) + str(j+1:j+1) = base64_alphabet( & + iand(ishft(triplet, -12_int32), 63_int32) + 1 : iand(ishft(triplet, -12_int32), 63_int32) + 1) + str(j+2:j+2) = base64_alphabet( & + iand(ishft(triplet, -6_int32), 63_int32) + 1 : iand(ishft(triplet, -6_int32), 63_int32) + 1) str(j+3:j+3) = "=" end if - + end subroutine base64_encode_into -end module stdlib_base64_encode \ No newline at end of file +end submodule stdlib_base64_encode \ No newline at end of file diff --git a/test/string/test_base64.f90 b/test/string/test_base64.f90 index 0fd5ff257..e35836702 100644 --- a/test/string/test_base64.f90 +++ b/test/string/test_base64.f90 @@ -3,6 +3,7 @@ module test_base64 use testdrive, only : new_unittest, unittest_type, error_type, check use stdlib_base64, only : base64_encode, base64_decode, base64_encode_into, base64_decode_into use stdlib_kinds, only : int8, int32, dp, lk + use stdlib_error, only : state_type implicit none contains @@ -49,11 +50,11 @@ end subroutine test_decode_whitespace subroutine test_decode_invalid(error) type(error_type), allocatable, intent(out) :: error - logical :: err_flag + type(state_type) :: err_flag call check(error, base64_decode("abc", err_flag) == "") if (allocated(error)) return - call check(error, err_flag) + call check(error, .not. err_flag%ok()) if (allocated(error)) return call check(error, base64_decode("A===") == "") @@ -135,13 +136,13 @@ subroutine test_encode_into(error) character(len=4) :: str character(len=2) :: small_str integer :: elen - logical :: err + type(state_type) :: err bytes = [int(77, int8), int(97, int8), int(110, int8)] ! Test successful zero-copy encode call base64_encode_into(bytes, str, elen, err) - call check(error, .not. err) + call check(error, err%ok()) if (allocated(error)) return call check(error, elen == 4) if (allocated(error)) return @@ -150,7 +151,7 @@ subroutine test_encode_into(error) ! Test buffer too small (Expect Error) call base64_encode_into(bytes, small_str, elen, err) - call check(error, err) + call check(error, .not. err%ok()) if (allocated(error)) return call check(error, small_str == "") end subroutine test_encode_into @@ -161,11 +162,11 @@ subroutine test_decode_into(error) character(len=3) :: res character(len=1) :: small_res integer :: dlen - logical :: err + type(state_type) :: err ! Test successful decode call base64_decode_into(str, res, dlen, err) - call check(error, .not. err) + call check(error, err%ok()) if (allocated(error)) return call check(error, dlen == 3) if (allocated(error)) return @@ -174,7 +175,7 @@ subroutine test_decode_into(error) ! Test buffer too small (Expect Error) call base64_decode_into(str, small_res, dlen, err) - call check(error, err) + call check(error, .not. err%ok()) if (allocated(error)) return end subroutine test_decode_into From 6a013bf569cd3b7f2738bf6d399aec1661d0abbf Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Mon, 13 Apr 2026 14:26:41 +0530 Subject: [PATCH 6/7] fix(base64): address review feedback for portability and style - use MAXRANK-based rank generation (including rank-0 scalar support) - align c_bool import with stdlib_kinds and keep iso_c_binding for c_size_t - derive base64 alphabet from letters and digits constants - simplify decode despace loop by skipping filtered-out bytes directly - tidy roundtrip tests: parameterized constants and 132-char-safe line split Signed-off-by: RatanKokal --- src/core/stdlib_ascii.fypp | 3 +-- src/strings/stdlib_base64.fypp | 4 ++-- src/strings/stdlib_base64_decode.fypp | 5 +++-- src/strings/stdlib_base64_encode.fypp | 4 +++- test/string/test_base64.f90 | 9 +++++---- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/core/stdlib_ascii.fypp b/src/core/stdlib_ascii.fypp index b6341703a..1555fc492 100644 --- a/src/core/stdlib_ascii.fypp +++ b/src/core/stdlib_ascii.fypp @@ -65,8 +65,7 @@ module stdlib_ascii character(len=*), public, parameter :: letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" !! A .. Za .. z character(len=*), public, parameter :: uppercase = letters(1:26) !! A .. Z character(len=*), public, parameter :: lowercase = letters(27:) !! a .. z - character(len=*), public, parameter :: base64_alphabet = & - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" !! RFC 4648 Base64 alphabet + character(len=*), public, parameter :: base64_alphabet = letters//digits//"+/" !! RFC 4648 Base64 alphabet character(len=*), public, parameter :: whitespace = " "//TAB//VT//CR//LF//FF !! ASCII _whitespace diff --git a/src/strings/stdlib_base64.fypp b/src/strings/stdlib_base64.fypp index 38297ef99..275f22e17 100644 --- a/src/strings/stdlib_base64.fypp +++ b/src/strings/stdlib_base64.fypp @@ -11,8 +11,8 @@ !> may be subject to change in future releases of `stdlib`. module stdlib_base64 - use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk - use, intrinsic :: iso_c_binding, only: c_size_t, c_bool + use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64, lk, c_bool + use, intrinsic :: iso_c_binding, only: c_size_t use stdlib_ascii, only: base64_alphabet use stdlib_error, only: state_type diff --git a/src/strings/stdlib_base64_decode.fypp b/src/strings/stdlib_base64_decode.fypp index fb6377390..e3c821176 100644 --- a/src/strings/stdlib_base64_decode.fypp +++ b/src/strings/stdlib_base64_decode.fypp @@ -56,8 +56,9 @@ contains raw_c = int(iachar(str(i:i)), int32) unicode_accum = ior(unicode_accum, raw_c) c = iand(raw_c, 255_int32) - filtered(j + 1) = int(c, int8) - j = j + IS_VAL(c) + if (IS_VAL(c) == 0) cycle + j = j + 1 + filtered(j) = int(c, int8) end do slen = j diff --git a/src/strings/stdlib_base64_encode.fypp b/src/strings/stdlib_base64_encode.fypp index 471bb6e2b..a2d87fca9 100644 --- a/src/strings/stdlib_base64_encode.fypp +++ b/src/strings/stdlib_base64_encode.fypp @@ -6,7 +6,7 @@ submodule(stdlib_base64) stdlib_base64_encode contains -#:set RANKS = range(16) +#:set RANKS = range(0, MAXRANK + 1) #:for k1, t1, _, _ in REAL_KINDS_TYPES module function base64_encode_real_${k1}$(data) result(str) @@ -88,6 +88,8 @@ contains module function base64_encode_bytes(bytes) result(str) integer(int8), intent(in), target, contiguous :: bytes(:) character(len=:), allocatable, target :: str + ! Use c_size_t to safely track the size of massive arrays + ! without risking 32-bit integer overflow, and to ensure C-interop integer(c_size_t) :: nbytes integer :: str_len integer :: i, j diff --git a/test/string/test_base64.f90 b/test/string/test_base64.f90 index e35836702..b8852d093 100644 --- a/test/string/test_base64.f90 +++ b/test/string/test_base64.f90 @@ -80,10 +80,10 @@ end subroutine test_roundtrip_int32 subroutine test_roundtrip_real(error) type(error_type), allocatable, intent(out) :: error - real(dp) :: vals(4), got(4) + real(dp) :: got(4) character(len=:), allocatable :: enc, dec - vals = [1.5_dp, -2.25_dp, 0.125_dp, 9.0_dp] + real(dp), parameter :: vals(*) = [1.5_dp, -2.25_dp, 0.125_dp, 9.0_dp] enc = base64_encode(vals) dec = base64_decode(enc) @@ -95,10 +95,11 @@ end subroutine test_roundtrip_real subroutine test_roundtrip_complex(error) type(error_type), allocatable, intent(out) :: error - complex(dp) :: vals(3), got(3) + complex(dp) :: got(3) character(len=:), allocatable :: enc, dec - vals = [cmplx(1.0_dp, 2.0_dp, dp), cmplx(-3.0_dp, 0.5_dp, dp), cmplx(0.0_dp, -4.0_dp, dp)] + complex(dp), parameter :: vals(*) = [cmplx(1.0_dp, 2.0_dp, dp), & + cmplx(-3.0_dp, 0.5_dp, dp), cmplx(0.0_dp, -4.0_dp, dp)] enc = base64_encode(vals) dec = base64_decode(enc) From 4b9ac9ba733e3a22ed06be7d925855ccaff151c9 Mon Sep 17 00:00:00 2001 From: RatanKokal Date: Mon, 13 Apr 2026 14:37:23 +0530 Subject: [PATCH 7/7] docs(base64): add stdlib_base64 spec Signed-off-by: RatanKokal --- doc/specs/index.md | 1 + doc/specs/stdlib_base64.md | 172 +++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 doc/specs/stdlib_base64.md diff --git a/doc/specs/index.md b/doc/specs/index.md index f709fb2ca..d104c0397 100644 --- a/doc/specs/index.md +++ b/doc/specs/index.md @@ -14,6 +14,7 @@ This is an index/directory of the specifications (specs) for each new module/fea - [ansi](./stdlib_ansi.html) - Terminal color and style escape sequences - [array](./stdlib_array.html) - Procedures for index manipulation and array handling - [ascii](./stdlib_ascii.html) - Procedures for handling ASCII characters + - [base64](./stdlib_base64.html) - Base64 encoding and decoding routines - [constants](./stdlib_constants.html) - Constants - [bitsets](./stdlib_bitsets.html) - Bitset data types and procedures - [error](./stdlib_error.html) - Catching and handling errors diff --git a/doc/specs/stdlib_base64.md b/doc/specs/stdlib_base64.md new file mode 100644 index 000000000..67ea000d4 --- /dev/null +++ b/doc/specs/stdlib_base64.md @@ -0,0 +1,172 @@ +--- +title: base64 +--- + +# The `stdlib_base64` module + +[TOC] + +## Introduction + +The `stdlib_base64` module provides procedures to encode and decode intrinsic +Fortran data using the Base64 encoding scheme defined by RFC 4648. + +## Specification of the `stdlib_base64` procedures + +### `base64_encode` + +#### Status + +Experimental + +#### Description + +Encodes intrinsic arrays into Base64 text. + +This is the ergonomic API for encoding. It allocates and returns the output +string automatically. + +#### Syntax + +`res =` [[stdlib_base64(module):base64_encode(interface)]] `(data)` + +#### Class + +Function. + +#### Argument + +`data`: shall be a contiguous array of an intrinsic type (`real`, `integer`, +`complex`, `logical`, or `integer(int8)`). It is an `intent(in)` argument. + +#### Result value + +The result `res` is an allocatable character string (`character(len=:)`) +containing the Base64 representation of the input `data`. + +#### Example + +```fortran +character(len=:), allocatable :: encoded +integer(int8) :: bytes(3) = [77_int8, 97_int8, 110_int8] + +encoded = base64_encode(bytes) ! "TWFu" +``` + +### `base64_encode_into` + +#### Status + +Experimental + +#### Description + +Encodes bytes into a caller-provided output buffer. + +This is the preallocated API for throughput-sensitive workflows. It does not +allocate and reports status through `err_state`. On success, +`err_state%ok()` is `.true.` and `encoded_len` is the number of meaningful +characters written to `str`. + +#### Syntax + +`call` [[stdlib_base64(module):base64_encode_into(subroutine)]] & +`(bytes, str, encoded_len, err_state)` + +#### Class + +Pure module subroutine. + +#### Arguments + +`bytes`: shall be a contiguous array of type `integer(int8)`. It is an +`intent(in)` argument. + +`str`: shall be an intrinsic character type. It is an `intent(out)` argument. + +`encoded_len`: shall be an `integer`. It is an `intent(out)` argument +representing the number of encoded characters written. + +`err_state`: shall be a `type(state_type)` from `stdlib_error`. It is an +`intent(out)` argument used to report success or errors. + +### `base64_decode` + +#### Status + +Experimental + +#### Description + +Decodes Base64 text and returns an allocated byte string. + +This is the ergonomic API for decoding. It allocates and returns the result +automatically. On error, an empty result is returned. If `err_state` is +present, error details are reported there. + +#### Syntax + +`res =` [[stdlib_base64(module):base64_decode(function)]] `(str [, err_state])` + +#### Class + +Function. + +#### Arguments + +`str`: shall be an intrinsic character type. It is an `intent(in)` argument. + +`err_state` (optional): shall be a `type(state_type)` from `stdlib_error`. +It is an `intent(out)` argument used to report invalid Base64 sequences. + +#### Result value + +The result `res` is an allocatable character string (`character(len=:)`) +containing the decoded bytes. + +#### Example + +```fortran +character(len=:), allocatable :: decoded + +decoded = base64_decode("TWFu") ! "Man" +``` + +### `base64_decode_into` + +#### Status + +Experimental + +#### Description + +Decodes Base64 text into a caller-provided output buffer. + +This is the preallocated API for throughput-sensitive workflows. It does not +allocate and reports status through `err_state`. The optional `skip_despace` +argument can be used when the input is already whitespace-free to bypass the +despacing step. + +#### Syntax + +`call` [[stdlib_base64(module):base64_decode_into(subroutine)]] & +`(str, res, decoded_len, err_state [, skip_despace])` + +#### Class + +Pure module subroutine. + +#### Arguments + +`str`: shall be an intrinsic character type. It is an `intent(in)` argument. + +`res`: shall be an intrinsic character type. It is an `intent(out)` argument. + +`decoded_len`: shall be an `integer`. It is an `intent(out)` argument +representing the number of decoded bytes written. + +`err_state`: shall be a `type(state_type)` from `stdlib_error`. It is an +`intent(out)` argument used to report success or errors. + +`skip_despace` (optional): shall be a `logical`. It is an `intent(in)` +argument. If `.true.`, the routine assumes the input contains no whitespace.