Skip to content

Commit 1d537bf

Browse files
authored
Fix long encoding/decoding (#25)
1 parent fcf815e commit 1d537bf

File tree

2 files changed

+108
-23
lines changed

2 files changed

+108
-23
lines changed

lib/avro/datum.php

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,68 @@ public function __construct($expected_schema, $datum)
4343
}
4444
}
4545

46+
/**
47+
* Zigzag implementation to encode longs
48+
* https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
49+
*
50+
* @package Avro
51+
*/
52+
class Zigzag {
53+
54+
const BYTE_SIZE = 8;
55+
const PLATFORM_BITS = PHP_INT_SIZE * self::BYTE_SIZE;
56+
57+
/**
58+
* Implementation of unsigned shift right as PHP does not have the `>>>` operator
59+
*
60+
* @param int $n
61+
* @param int $x
62+
*
63+
* @return int
64+
*/
65+
public static function unsigned_right_shift(int $n, int $x): int
66+
{
67+
return ($n >> $x) ^ (($n >> (self::PLATFORM_BITS -1)) << (self::PLATFORM_BITS - $x));
68+
}
69+
70+
/**
71+
* @param int|string $n
72+
* @return string long $n encoded as bytes
73+
* @internal This relies on 64-bit PHP.
74+
*/
75+
public static function encode_long($n): string
76+
{
77+
$n = (int) $n;
78+
$n = ($n << 1) ^ ($n >> 63);
79+
$str = '';
80+
if (($n & ~0x7F) != 0) {
81+
$str .= chr(($n | 0x80) & 0xFF);
82+
$n = self::unsigned_right_shift($n, 7);
83+
84+
while ($n > 0x7F) {
85+
$str .= chr(($n | 0x80) & 0xFF);
86+
$n = self::unsigned_right_shift($n, 7);
87+
}
88+
}
89+
90+
$str .= chr($n);
91+
return $str;
92+
}
93+
94+
public static function decode_long(array $bytes): int {
95+
$b = array_shift($bytes);
96+
$n = $b & 0x7f;
97+
$shift = 7;
98+
while (0 != ($b & 0x80))
99+
{
100+
$b = array_shift($bytes);
101+
$n |= (($b & 0x7f) << $shift);
102+
$shift += 7;
103+
}
104+
return self::unsigned_right_shift($n, 1) ^ -($n & 1);
105+
}
106+
}
107+
46108
/**
47109
* Exceptions arising from incompatibility between
48110
* reader and writer schemas.
@@ -304,18 +366,9 @@ static function double_to_long_bits($double)
304366
* @return string long $n encoded as bytes
305367
* @internal This relies on 64-bit PHP.
306368
*/
307-
static public function encode_long($n)
369+
public static function encode_long($n): string
308370
{
309-
$n = (int) $n;
310-
$n = ($n << 1) ^ ($n >> 63);
311-
$str = '';
312-
while (0 != ($n & ~0x7F))
313-
{
314-
$str .= chr(($n & 0x7F) | 0x80);
315-
$n >>= 7;
316-
}
317-
$str .= chr($n);
318-
return $str;
371+
return Zigzag::encode_long($n);
319372
}
320373

321374
/**
@@ -931,16 +984,7 @@ class AvroIOBinaryDecoder
931984
*/
932985
public static function decode_long_from_array($bytes)
933986
{
934-
$b = array_shift($bytes);
935-
$n = $b & 0x7f;
936-
$shift = 7;
937-
while (0 != ($b & 0x80))
938-
{
939-
$b = array_shift($bytes);
940-
$n |= (($b & 0x7f) << $shift);
941-
$shift += 7;
942-
}
943-
return (($n >> 1) ^ -($n & 1));
987+
return Zigzag::decode_long($bytes);
944988
}
945989

946990
/**

test/DatumIOTest.php

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,37 @@ function test_datum_round_trip($schema_json, $datum, $binary)
5151
$this->assertEquals($datum, $read_datum);
5252
}
5353

54+
/**
55+
* @dataProvider zigzag_unsigned_right_shift_provider
56+
*/
57+
function test_zigzag_unsigned_right_shift(int $expected, int $n, int $x) {
58+
$this->assertEquals($expected, Zigzag::unsigned_right_shift($n, $x));
59+
}
60+
61+
public static function zigzag_unsigned_right_shift_provider(): array {
62+
return [
63+
[4611686018427387902, -8, 2],
64+
[2, 8, 2],
65+
[144115188075855871, -2, 7],
66+
[1125899906842623, 144115188075855871, 7],
67+
[8796093022207, 1125899906842623, 7],
68+
[68719476735, 8796093022207, 7],
69+
[536870911, 68719476735, 7],
70+
[4194303, 536870911, 7],
71+
[32767, 4194303, 7],
72+
[255, 32767, 7],
73+
[1, 255, 7],
74+
[144115188059078656, -2147483648, 7],
75+
[1125899906711552, 144115188059078656, 7],
76+
[8796093021184, 1125899906711552, 7],
77+
[68719476728, 8796093021184, 7],
78+
[536870911, 68719476728, 7],
79+
[4194303, 536870911, 7],
80+
[32767, 4194303, 7],
81+
[255, 32767, 7],
82+
];
83+
}
84+
5485
/**
5586
* @return array
5687
*/
@@ -67,11 +98,21 @@ function data_provider()
6798
array('"int"', 1, "\002"),
6899
array('"int"', 2147483647, "\xFE\xFF\xFF\xFF\x0F"),
69100

70-
// array('"long"', (int) -9223372036854775808, "\001"),
101+
array('"long"', (int) -9223372036854775808, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),
102+
array('"long"', -(1<<62), "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"),
103+
array('"long"', -4294967295, "\xFD\xFF\xFF\xFF\x1F"),
104+
array('"long"', -10, "\x13"),
105+
array('"long"', -3, "\005"),
106+
array('"long"', -2, "\003"),
71107
array('"long"', -1, "\001"),
72108
array('"long"', 0, "\000"),
73109
array('"long"', 1, "\002"),
74-
// array('"long"', 9223372036854775807, "\002")
110+
array('"long"', 2, "\004"),
111+
array('"long"', 3, "\006"),
112+
array('"long"', 10, "\x14"),
113+
array('"long"', 4294967295, "\xFE\xFF\xFF\xFF\x1F"),
114+
array('"long"', 1<<62, "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01"),
115+
array('"long"', 9223372036854775807, "\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),
75116

76117
array('"float"', (float) -10.0, "\000\000 \301"),
77118
array('"float"', (float) -1.0, "\000\000\200\277"),

0 commit comments

Comments
 (0)