Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,7 @@ For my use case (>1GB of messy data and objects), cl-binary-store is almost 10x
See [benchmarking.md](benchmarking.md).

## TODO ideas
- [ ] A codespace for conspack or messagepack (for example; for a limited subset of types). This is mainly as an exercise as cl-messagepack and cl-conspack are fine implementations.
- [ ] Faster UTF-8 encoding / decoding (currently doing extra copy using sb-ext string-to-octets / octets-to-string)... probably get trivial-utf8 to export the byte-wise ops.
- [ ] Speed up cl-binary-store on ABCL and ECL so it is less than 10x slower than on SBCL
- [ ] Specialize store / restore functions on global settings to avoid so many parameters being passed around.
- [ ] When using implicit reference tracking, use a vector on restore like we do for the explicit case instead of overloading the reference hash table
- [ ] Turn versioned output on by default when reading/writing to a file
- [ ] Short specialized arrays use three available bits for length encoding
- [ ] Handle specialized multi-dimensional array data on non-SBCL faster. See babel for all the variants on with-array-data
- [ ] Faster standard-object serialization / deserialization using direct slot location accessors
- [ ] Handle ECL does not like array element type nil (like what happens when you have a zero size array) with invalid-data condition
67 changes: 48 additions & 19 deletions src/simple-array.lisp
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
(in-package :cl-binary-store)

;; TODO CHECK INTEROP, DID I REVERSE BITS ON BIT VECTORS, ETC.
;;; A quick note on serialized data format and malicious data
;; Most of the specialized array types are generally safe, in the
;; sense that they are densely packed --- the full double-float and
;; single-float space is available, you could pack it with random bits
;; and it would be fine. Same for simple-bit-vectors, ub8, ub16,
;; ub32, and ub64 and the equivalent signed versions. The main place
;; where things can go wrong are in (simple-array fixnum (*)) where
;; the backing array is actually a 64-bit storage slot with the values
;; fixnum encoded (shifted left one) in memory, so if you stuff a
;; non-fixnum in, bad things happen as the data is improperly tagged!
;; Less so for types like (unsigned-byte 7) and (unsigned-byte 15)
;; arrays, where most implementations don't care if you do bad things
;; to the remaining bits because the data is not stored fixnum encoded
;; / tagged, so when getting data out of the raw array, the bits are
;; immediately shifted turning them into a fixnum before anyone can
;; care and their identity is lost. Our serialized storage format for
;; fixnums matches that of SBCL where we shift numbers up by one
;; (mainly so that we can blit data directly during writing from an
;; sbcl host without having to touch the data). On all hosts,
;; including sbcl, we have to read the data back slowly and unshift it
;; and then let the reader lisp re-encode it in its internal fixnum
;; representation. Otherwise a malicious actor could stuff
;; non-fixnums in there, which results in memory faults on sbcl.

(declaim (ftype (function (t &optional (unsigned-byte 58))
(values (unsigned-byte 50) (unsigned-byte 8) &optional))
Expand Down Expand Up @@ -295,6 +317,30 @@
(cerror "USE-EMPTY-STRING" 'invalid-input-data :format-control "While restoring a string expected one of ~A, found ~A" :format-arguments (list #.(format nil "(~A ~A)" +simple-string-code+ +simple-base-string-code+) code))
""))))

(defmacro sap-ref-fixnum (sap offset)
"This is hideous. On SBCL we have stored the data fixnum encoded, which means shifted up
by one so we shift it back to generate an 'unencoded' fixnum. We would normally just have
blitted things back into the array without telling sbcl what we are doing, but in the
presence of potentially malicious data, we have to make sure that our numbers are fixnums,
so we shift them back down which makes it impossible for malicious actor to put bogus dat
in the array sap. On non sbcl we just check to make sure things are fixnums because we
did not blit the data out"
#+sbcl
`(ash (signed-sap-ref-64 ,sap ,offset) -1)
#-sbcl
(let ((a (gensym)))
`(let ((,a (ash (signed-sap-ref-64 ,sap ,offset) -1)))
(if (typep ,a 'fixnum)
,a
(unexpected-data "non fixnum in fixnum array")))))

(defmacro set-sap-ref-fixnum (sap offset value)
"Inside serialized specialized arrays, we store fixnums in 64-bit
storage spots shifted up one, that is as if all implementations just
used a 0 tag bit as the lowest bit."
;; This isn't used on sbcl as we blit things directly
`(set-signed-sap-ref-64 ,sap ,offset (ash ,value 1)))

(defmacro make-writer/reader (size-bits signed &key name-override reader array-type)
(let* ((writer (not reader))
(set/get (if (>= size-bits 8)
Expand Down Expand Up @@ -383,7 +429,7 @@
(bit (writer 1 nil))
(base-char (error "Should be handled by string store functions"))
(character (error "Should be handled by string store functions"))
(fixnum (writer 64 t set-signed-sap-ref-64 (simple-array fixnum (*))))
(fixnum (writer 64 t set-sap-ref-fixnum (simple-array fixnum (*))))
(single-float (writer 32 nil set-sap-ref-single (simple-array single-float (*))))
(double-float (writer 64 nil set-sap-ref-double (simple-array double-float (*)))))))))

Expand Down Expand Up @@ -420,23 +466,6 @@
,@body
(setf (read-storage-offset ,storage) (+ ,original-offset ,reserve-bytes)))))))

(defmacro sap-ref-fixnum (sap offset)
"This is hideous. On SBCL we have stored the data fixnum encoded, which means shifted up
by one so we shift it back to generate an 'unencoded' fixnum. We would normally just have
blitted things back into the array without telling sbcl what we are doing, but in the
presence of potentially malicious data, we have to make sure that our numbers are fixnums,
so we shift them back down which makes it impossible for malicious actor to put bogus dat
in the array sap. On non sbcl we just check to make sure things are fixnums because we
did not blit the data out"
#+sbcl
`(ash (signed-sap-ref-64 ,sap ,offset) -1)
#-sbcl
(let ((a (gensym)))
`(let ((,a (signed-sap-ref-64 ,sap ,offset)))
(if (typep ,a 'fixnum)
,a
(unexpected-data "non fixnum in fixnum array")))))

(defun restore-simple-specialized-vector (storage)
(declare (optimize (speed 3) (safety 1)))
(let ((num-elts (restore-tagged-unsigned-fixnum/interior storage)))
Expand Down
18 changes: 13 additions & 5 deletions test/cl-binary-store-tests.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,11 @@
(true (zerop (length (restore (store nil nil))))))

(let ((stuff (list -123 -1234 -123456 -34247823946234923864 #*0101
-1f0 -2d0 -1.234d0 (expt 2 64) (/ (expt 2 128) (expt 2 12))
(complex 1d0 1d0)
(make-array 123 :element-type 'double-float :initial-element 1.23d0)
(make-array 123 :element-type '(signed-byte 32) :initial-element -123984))))
-1f0 -2d0 -1.234d0 (expt 2 64) (/ (expt 2 128) (expt 2 12))
(complex 1d0 1d0)
(make-array 123 :element-type 'double-float :initial-element 1.23d0)
(make-array 123 :element-type '(signed-byte 32) :initial-element -123984)
(make-array 1 :element-type 'fixnum :initial-element (expt 2 58)))))
(define-test test-interop-write
;; this writes a file with a bunch of stuff
(store "blarg.bin" stuff)
Expand Down Expand Up @@ -699,7 +700,14 @@
(loop repeat n
collect
(loop repeat m collect (random most-positive-fixnum))))))
(is 'equalp (restore (store nil arr)) arr)))
(is 'equalp (restore (store nil arr)) arr))
#+sbcl (is 'equalp (restore #(21 5 3 0 0 0 0 0 0 0 64))
(make-array 1 :element-type 'fixnum :initial-element (expt 2 61)))
#-sbcl
(finish (handler-case (restore #(21 5 3 0 0 0 0 0 0 0 64))
(invalid-input-data ())))
(is 'equalp (restore #(21 5 3 0 0 0 0 0 0 0 8)) ;; everyone agrees on this, (expt 2 58)
(make-array 1 :element-type 'fixnum :initial-element (expt 2 58))))

(define-test simple-array-fixnum-malicious
;; The below is a non-fixnum claiming to be in a fixnum array
Expand Down