Skip to content

Commit 820c14b

Browse files
authored
Implement serialization of decimal logical type (#22)
Thanks to @thiagorb for the contribution!
1 parent 340f8e2 commit 820c14b

File tree

3 files changed

+94
-6
lines changed

3 files changed

+94
-6
lines changed

lib/avro/datum.php

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ function write_data($writers_schema, $datum, $encoder)
117117
case AvroSchema::STRING_TYPE:
118118
return $encoder->write_string($datum);
119119
case AvroSchema::BYTES_TYPE:
120-
return $encoder->write_bytes($datum);
120+
return $encoder->write_bytes($writers_schema, $datum);
121121
case AvroSchema::ARRAY_SCHEMA:
122122
return $this->write_array($writers_schema, $datum, $encoder);
123123
case AvroSchema::MAP_SCHEMA:
@@ -386,13 +386,25 @@ public function write_double($datum)
386386
* @param string $str
387387
* @uses self::write_bytes()
388388
*/
389-
function write_string($str) { $this->write_bytes($str); }
389+
function write_string($str) { $this->write_bytes(null, $str); }
390390

391391
/**
392+
* @param AvroSchema|null $writers_schema
392393
* @param string $bytes
394+
* @throws AvroException
393395
*/
394-
function write_bytes($bytes)
396+
function write_bytes($writers_schema, $bytes)
395397
{
398+
if ($writers_schema !== null && $writers_schema->logical_type() === 'decimal') {
399+
$scale = $writers_schema->extra_attributes()['scale'] ?? 0;
400+
$precision = $writers_schema->extra_attributes()['precision'] ?? null;
401+
if ($precision === null) {
402+
throw new AvroException('Decimal precision is required');
403+
}
404+
405+
$bytes = self::decimal_to_bytes($bytes, $scale, $precision);
406+
}
407+
396408
$this->write_long(strlen($bytes));
397409
$this->write($bytes);
398410
}
@@ -401,6 +413,49 @@ function write_bytes($bytes)
401413
* @param string $datum
402414
*/
403415
function write($datum) { $this->io->write($datum); }
416+
417+
/**
418+
* @throws AvroException
419+
*/
420+
private static function decimal_to_bytes($decimal, int $scale, int $precision): string
421+
{
422+
if (!is_numeric($decimal)) {
423+
throw new AvroException('Decimal must be a numeric value');
424+
}
425+
426+
$value = $decimal * (10 ** $scale);
427+
if (!is_int($value)) {
428+
$value = (int)round($value);
429+
}
430+
if (abs($value) > (10 ** $precision - 1)) {
431+
throw new AvroException('Decimal value is out of range');
432+
}
433+
434+
$packed = pack('J', $value);
435+
$significantBit = self::getMostSignificantBitAt($packed, 0);
436+
$trimByte = $significantBit ? 0xff : 0x00;
437+
438+
$offset = 0;
439+
$packedLength = strlen($packed);
440+
while ($offset < $packedLength - 1) {
441+
if (ord($packed[$offset]) !== $trimByte) {
442+
break;
443+
}
444+
445+
if (self::getMostSignificantBitAt($packed, $offset + 1) !== $significantBit) {
446+
break;
447+
}
448+
449+
$offset++;
450+
}
451+
452+
return substr($packed, $offset);
453+
}
454+
455+
private static function getMostSignificantBitAt($bytes, $offset): int
456+
{
457+
return ord($bytes[$offset]) & 0x80;
458+
}
404459
}
405460

406461
/**
@@ -925,8 +980,10 @@ static public function long_bits_to_double($bits)
925980
*/
926981
static public function bytes_to_decimal($bytes, $scale = 0)
927982
{
928-
$int = hexdec(bin2hex($bytes));
929-
return $scale > 0 ? ($int / (10 ** $scale)) : $int;
983+
$mostSignificantBit = ord($bytes[0]) & 0x80;
984+
$padded = str_pad($bytes, 8, $mostSignificantBit ? "\xff" : "\x00", STR_PAD_LEFT);
985+
$int = unpack('J', $padded)[1];
986+
return $scale > 0 ? ($int / (10 ** $scale)) : $int;
930987
}
931988

932989
/**

lib/avro/schema.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,11 @@ public static function is_valid_datum($expected_schema, $datum)
416416
case self::BOOLEAN_TYPE:
417417
return is_bool($datum);
418418
case self::STRING_TYPE:
419+
return is_string($datum);
419420
case self::BYTES_TYPE:
421+
if ($expected_schema->logical_type() === 'decimal') {
422+
return is_numeric($datum);
423+
}
420424
return is_string($datum);
421425
case self::INT_TYPE:
422426
return (is_int($datum)

test/DatumIOTest.php

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,34 @@ function data_provider()
101101
'B', "\x02"),
102102
array('{"name":"rec","type":"record","fields":[{"name":"a","type":"int"},{"name":"b","type":"boolean"}]}',
103103
array('a' => 1, 'b' => false),
104-
"\x02\x00")
104+
"\x02\x00"),
105+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
106+
'1',
107+
"\x02\x0a"),
108+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
109+
'-0.1',
110+
"\x02\xff"),
111+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
112+
-0.1,
113+
"\x02\xff"),
114+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
115+
3.1,
116+
"\x02\x1f"),
117+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 2}',
118+
2.55,
119+
"\x04\x00\xff"),
120+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 0}',
121+
-256,
122+
"\x04\xff\x00"),
123+
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 3}',
124+
0.127,
125+
"\x02\x7f"),
126+
array('{"type":"bytes","logicalType": "decimal","precision": 19,"scale": 0}',
127+
PHP_INT_MAX,
128+
"\x10\x7f\xff\xff\xff\xff\xff\xff\xff"),
129+
array('{"type":"bytes","logicalType": "decimal","precision": 19,"scale": 0}',
130+
PHP_INT_MIN,
131+
"\x10\x80\x00\x00\x00\x00\x00\x00\x00")
105132
);
106133
}
107134

0 commit comments

Comments
 (0)