Skip to content

Commit 5bc3e4e

Browse files
committed
add integer parsing based off nodejs/llparse#32
1 parent bc8c932 commit 5bc3e4e

File tree

9 files changed

+298
-18
lines changed

9 files changed

+298
-18
lines changed

llparse/compilator.py

Lines changed: 163 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def tailTo(
337337
out: list[str],
338338
node: IWrap[_frontend.node.Node],
339339
noAdvance: bool,
340-
value: Optional[int],
340+
value: Optional[int] = None,
341341
):
342342
ctx = self.compilation
343343
target = ctx.unwrapNode(node).build(ctx)
@@ -512,7 +512,8 @@ def __init__(self, ref: _frontend.node.Sequence) -> None:
512512

513513
def doBuild(self, out: list[str]):
514514
ctx = self.compilation
515-
515+
# TODO: llparse_match_t could be easily changed around to
516+
# Something that can't be overlapped with when compiled with other parsers...
516517
out.append("llparse_match_t match_seq;")
517518
out.append("")
518519

@@ -639,7 +640,7 @@ def doBuild(self, out: list[str]):
639640
# Invoke callback
640641
callback = ctx.buildCode(ctx.unwrapCode(self.ref.callback, True))
641642

642-
out.append(f"err = {callback}({ctx.stateArg()}, start,{ctx.posArg()});")
643+
out.append(f"err = {callback}({ctx.stateArg()}, start, {ctx.posArg()});")
643644

644645
out.append("if (err != 0) {")
645646
tmp = []
@@ -676,6 +677,163 @@ def buildError(self, out: list[str], code: str):
676677
out.append(f"return {STATE_ERROR};")
677678

678679

680+
# Based off arthurschreiber's work with Indutny's Tips and requests added to the mix.
681+
682+
# 0x80 I8
683+
# 0x8000 I16
684+
# 0x800000 I24
685+
# 0x1000000 U24
686+
687+
class Int(Node):
688+
def __init__(self, ref: _frontend.node.Int):
689+
super().__init__(ref)
690+
self.ref = ref
691+
self.offset = ref.byteOffset
692+
# I'm going to deviate from arthurschreiber's work a bit with indutny's suggestions.
693+
# we should really be using bitwise operators like rshift and lshift
694+
@property
695+
def pair(self):
696+
return self.compilation, self.compilation.stateField(self.ref.field)
697+
698+
def readInt8(self, out: list[str]) -> None:
699+
ctx, index = self.pair
700+
out.append(f"{index} = ((*{ctx.posArg()}) & 0x80);")
701+
702+
def readUInt8(self, out: list[str]) -> None:
703+
ctx, index = self.pair
704+
out.append(f"{index} = (*{ctx.posArg()});")
705+
706+
# LITTLE ENDIAN
707+
708+
def readInt16LE(self, out: list[str]) -> None:
709+
ctx, index = self.pair
710+
if self.offset == 0:
711+
out.append(f"{index} = (*{ctx.posArg()});")
712+
else:
713+
# Since BE Belongs to performing << aka left shifts we do >> right shifts
714+
out.append(f"{index} = ({index} >> 8) | ((*{ctx.posArg()}) & 0x80);")
715+
716+
def readUInt16LE(self, out: list[str]) -> None:
717+
ctx, index = self.pair
718+
if self.offset == 0:
719+
out.append(f"{index} = (*{ctx.posArg()});")
720+
else:
721+
out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});")
722+
723+
def readInt24LE(self, out: list[str]) -> None:
724+
ctx, index = self.pair
725+
if self.offset == 0:
726+
out.append(f"{index} = (*{ctx.posArg()});")
727+
elif self.offset == 1:
728+
out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});")
729+
else:
730+
out.append(f"{index} = ({index} >> 8) | ((*{ctx.posArg()}) & 0x80);")
731+
732+
def readUInt24LE(self, out: list[str]) -> None:
733+
ctx, index = self.pair
734+
if self.offset == 0:
735+
out.append(f"{index} = (*{ctx.posArg()});")
736+
else:
737+
out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});")
738+
739+
def readInt32LE(self, out: list[str]) -> None:
740+
ctx, index = self.pair
741+
if self.offset == 0:
742+
out.append(f"{index} = (*{ctx.posArg()});")
743+
elif self.offset in (1, 2):
744+
out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});")
745+
else:
746+
out.append(f"{index} = ({index} >> 8) | ((*{ctx.posArg()}) & 0x80);")
747+
748+
def readUInt32LE(self, out: list[str]) -> None:
749+
ctx, index = self.pair
750+
if self.offset == 0:
751+
out.append(f"{index} = (*{ctx.posArg()});")
752+
else:
753+
out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});")
754+
755+
# BIG ENDIAN
756+
757+
def readInt16BE(self, out: list[str]) -> None:
758+
ctx, index = self.pair
759+
if self.offset == 0:
760+
out.append(f"{index} = (*{ctx.posArg()});")
761+
else:
762+
# Since LE Belongs to >> we do "<<" instead
763+
out.append(f"{index} = ({index} << 8) | ((*{ctx.posArg()}) & 0x80);")
764+
765+
def readUInt16BE(self, out: list[str]) -> None:
766+
ctx, index = self.pair
767+
if self.offset == 0:
768+
out.append(f"{index} = (*{ctx.posArg()});")
769+
else:
770+
out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});")
771+
772+
def readInt24BE(self, out: list[str]) -> None:
773+
ctx, index = self.pair
774+
if self.offset == 0:
775+
out.append(f"{index} = (*{ctx.posArg()});")
776+
elif self.offset == 1:
777+
out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});")
778+
else:
779+
out.append(f"{index} = ({index} << 8) | ((*{ctx.posArg()}) & 0x80);")
780+
781+
def readUInt24BE(self, out: list[str]) -> None:
782+
ctx, index = self.pair
783+
if self.offset == 0:
784+
out.append(f"{index} = (*{ctx.posArg()});")
785+
else:
786+
out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});")
787+
788+
def readInt32BE(self, out: list[str]) -> None:
789+
ctx, index = self.pair
790+
if self.offset == 0:
791+
out.append(f"{index} = (*{ctx.posArg()});")
792+
elif self.offset in (1, 2):
793+
out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});")
794+
else:
795+
out.append(f"{index} = ({index} << 8) | ((*{ctx.posArg()}) & 0x80);")
796+
797+
def readUInt32BE(self, out: list[str]) -> None:
798+
ctx, index = self.pair
799+
if self.offset == 0:
800+
out.append(f"{index} = (*{ctx.posArg()});")
801+
else:
802+
out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});")
803+
804+
805+
def doBuild(self, out:list[str]):
806+
self.prologue(out)
807+
# I'm still supporting 3.9 but I plan to drop it's support in favor of match case soon...
808+
bits = self.ref.bits
809+
810+
if self.compilation.getFieldType(self.ref.field) == 'ptr':
811+
raise ValueError(f'property {self.ref.field} should not use pointers but it was given \"ptr\"')
812+
813+
if bits == 1:
814+
self.readInt8(out) if self.ref.signed else self.readUInt8(out)
815+
elif bits == 2:
816+
if self.ref.littleEndian:
817+
self.readInt16LE(out) if self.ref.signed else self.readUInt16LE(out)
818+
else:
819+
self.readInt16BE(out) if self.ref.signed else self.readUInt16BE(out)
820+
elif bits == 3:
821+
if self.ref.littleEndian:
822+
self.readInt24LE(out) if self.ref.signed else self.readUInt24LE(out)
823+
else:
824+
self.readInt24BE(out) if self.ref.signed else self.readUInt24BE(out)
825+
else:
826+
if self.ref.littleEndian:
827+
self.readInt32LE(out) if self.ref.signed else self.readUInt32LE(out)
828+
else:
829+
self.readInt32BE(out) if self.ref.signed else self.readUInt32BE(out)
830+
# TODO: uint64 & int64
831+
832+
self.tailTo(out, self.ref.otherwise.node, self.ref.otherwise.noAdvance, None)
833+
834+
835+
836+
679837
MAX_CHAR = 0xFF
680838
TABLE_GROUP = 16
681839

@@ -1096,6 +1254,8 @@ def unwrapNode(self, node: IWrap[_frontend.node.Node]):
10961254
r = Sequence(ref)
10971255
elif isinstance(ref, _frontend.node.TableLookup):
10981256
r = TableLookup(ref)
1257+
elif isinstance(ref, _frontend.node.Int):
1258+
r = Int(ref)
10991259
else:
11001260
raise TypeError(
11011261
f'refrence "{ref}" is an Invalid Code Type , TypeName:"{ref.__class__.__name__}"'

llparse/frontend.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -244,31 +244,37 @@ def ID():
244244

245245
elif isinstance(node, source.code.Match):
246246
result = self.translateMatch(node)
247+
248+
elif isinstance(node, source.node.Int):
249+
result = self.translateInt(node)
250+
247251
else:
248252
raise Exception(f'Unknown Node Type for :"{node.name}" {type(node)}')
249253

250254
otherwise = node.getOtherwiseEdge()
251255

252256
if isinstance(result, list):
253257
# result:list[WrappedNode]
254-
assert isinstance(node, source.code.Match)
255-
_match = node
256258

257-
if not otherwise:
258-
raise Exception(f'Node "{node.name}" has no ".otherwise()"')
259+
assert isinstance(node, (source.code.Match, source.node.Int))
260+
_match = node
261+
262+
assert otherwise, (f'Node "{node.name}" has no ".otherwise()"')
259263

260-
else:
264+
if isinstance(node, source.node.Match):
261265
for child in result:
262266
if not child.ref.otherwise:
263267
child.ref.setOtherwise(
264268
self.translate(otherwise.node), otherwise.noAdvance
265269
)
270+
transform = self.translateTransform(_match.getTransform())
271+
for child in result:
272+
# TODO Vizonex : This might break , be sure to make a workaround function here...
273+
child.ref.setTransform(transform)
266274

267-
transform = self.translateTransform(_match.getTransform())
268-
for child in result:
269-
# TODO Vizonex : This might break , be sure to make a workaround function here...
270-
child.ref.setTransform(transform)
271-
275+
276+
else:
277+
result[-1].ref.setOtherwise(self.translate(otherwise.node), otherwise.noAdvance)
272278
assert len(result) >= 1
273279
return result[0]
274280

@@ -299,6 +305,23 @@ def ID():
299305
assert len(list(node)) == 0
300306

301307
return single
308+
309+
def translateInt(self, node: source.node.Int) -> list[IWrap[_frontend.node.Int]]:
310+
inner = _frontend.node.Int(self.Id.id(node.name), node.field, node.bits, node.signed, node.little_endian, 0)
311+
result = [self.implementation.node.Int(inner)]
312+
# front is to avoid overlapping with python's functions (aka next)
313+
front = self.Map[node] = result[0]
314+
315+
for offset in range(1, node.bits):
316+
unique_name = self.Id.id(f"{node.name}_byte{offset + 1}")
317+
inner = _frontend.node.Int(unique_name, node.field, node.bits, node.signed, node.little_endian, offset)
318+
outer = self.implementation.node.Int(inner)
319+
result.append(outer)
320+
# Integers will advance since they are unpacking values...
321+
front.ref.setOtherwise(outer, False)
322+
front = result[-1]
323+
return result
324+
302325

303326
def maybeTableLookup(
304327
self, node: source.code.Match, trie: TrieSingle, children: MatchChildren

llparse/pybuilder/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
11
from ..pybuilder.builder import *
22
from ..pybuilder.loopchecker import *
3+
from ..pybuilder.main_code import (
4+
# I'll add more soon I feel a little lazy at the moment.
5+
Node,
6+
Match,
7+
Int
8+
)

llparse/pybuilder/builder.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from typing import Literal, Optional, Union
2-
32
from ..pybuilder import main_code as code
43

4+
# typehinting node and code (TODO: Vizonex) Lets seperate the modules soon...
5+
node = code
56
# from pydot import graph_from_dot_data
67

78

@@ -316,3 +317,38 @@ def property(self, ty: Literal["i8", "i16", "i32", "i64", "ptr"], name: str):
316317
def properties(self) -> list[Property]:
317318
"""Return list of all allocated properties in parser's state."""
318319
return list(self.privProperties.values())
320+
321+
def intBE(self, field: str, bits:int):
322+
"""
323+
:param field: State's property name
324+
:param bits: Number of bits to use
325+
"""
326+
return code.Int(field, bits, True, False)
327+
328+
def intLE(self, field: str, bits: int):
329+
"""
330+
return a node for unpacking arrays to integers
331+
332+
:param field: State's property name
333+
:param bits: Number of bits to use
334+
"""
335+
return code.Int(field, bits, True, True)
336+
337+
def uintBE(self, field: str, bits: int):
338+
"""
339+
return a node for unpacking arrays to integers
340+
341+
:param field: State's property name
342+
:param bits: Number of bits to use
343+
"""
344+
return code.Int(field, bits, False, False)
345+
346+
def uintLE(self, field: str, bits: int):
347+
"""
348+
return a node for unpacking arrays to integers
349+
350+
:param field: State's property name
351+
:param bits: Number of bits to use
352+
"""
353+
return code.Int(field, bits, False, True)
354+

llparse/pybuilder/loopchecker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def visit(self, node: Node, path: list[Node]):
252252
return
253253

254254
for edge in node.getAllEdges():
255-
if not edge.noAdvance:
255+
if edge.noAdvance:
256256
continue
257257
edgeValue = value
258258
if edge.key is None or isinstance(edge.key, int):

llparse/pybuilder/main_code.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ def __init__(self, name: str) -> None:
9292
super().__init__("match", name)
9393

9494

95+
96+
9597
@dataclass
9698
class IMulAddOptions:
9799
base: int
@@ -244,6 +246,39 @@ def __init__(self, code: Code, IInvokeMap: dict[int, Node]) -> None:
244246
self.addEdge(Edge(targetNode, True, numKey, None))
245247

246248

249+
250+
# Not in llparse node-js (yet) But I wanted to implement
251+
# this into my version since I am making a very important
252+
# http2 frame parser
253+
254+
# SEE: https://github.com/nodejs/llparse-frontend/pull/1
255+
256+
def build_name(field:str, bits: int, signed:bool, little_endian:bool) -> str:
257+
result = f"{field}_{'int' if signed else 'uint'}_{bits * 8}"
258+
if bits > 1:
259+
return result + ('_le' if little_endian else 'be')
260+
else:
261+
return result
262+
263+
264+
class Int(Node):
265+
"""Used for parsing bytes via unpacking"""
266+
def __init__(self, field: str, bits: int, signed: bool, little_endian: bool) -> None:
267+
"""
268+
:param field: State's property name
269+
:param bits: Number of bits to use
270+
:param signed: Number is signed
271+
:param little_endian: true if le, false if be
272+
"""
273+
if bits < 0:
274+
raise ValueError("bits should be a positive integer")
275+
self.field = field
276+
self.bits = bits
277+
self.signed = signed
278+
self.little_endian = little_endian
279+
super().__init__(build_name(field, bits, signed, little_endian))
280+
281+
247282
# -- Transfroms --
248283

249284
TransformName = ["to_lower_unsafe", "to_lower"]

llparse/pyfront/front.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
class IWrap(Generic[T]):
1515
ref: T
1616

17-
# def __hash__(self) -> int:
18-
# return hash(self.ref)
19-
2017

2118
def toCacheKey(value: Union[int, bool]) -> str:
2219
if isinstance(value, int):

0 commit comments

Comments
 (0)