diff --git a/llparse/compilator.py b/llparse/compilator.py index f179ed8..3f111d3 100644 --- a/llparse/compilator.py +++ b/llparse/compilator.py @@ -337,7 +337,7 @@ def tailTo( out: list[str], node: IWrap[_frontend.node.Node], noAdvance: bool, - value: Optional[int], + value: Optional[int] = None, ): ctx = self.compilation target = ctx.unwrapNode(node).build(ctx) @@ -501,7 +501,7 @@ def doBuild(self, out: list[str]): assert self.ref.otherwise otherwise = ctx.unwrapNode(self.ref.otherwise.node) - out.append(f"{ctx.currentField()} = (void*) (intptr_t) {otherwise};") + out.append(f"{ctx.currentField()} = (void*) (intptr_t) {otherwise.cachedDecel};") out.append(f"return {STATE_ERROR};") @@ -512,7 +512,8 @@ def __init__(self, ref: _frontend.node.Sequence) -> None: def doBuild(self, out: list[str]): ctx = self.compilation - + # TODO: llparse_match_t could be easily changed around to + # Something that can't be overlapped with when compiled with other parsers... out.append("llparse_match_t match_seq;") out.append("") @@ -639,7 +640,7 @@ def doBuild(self, out: list[str]): # Invoke callback callback = ctx.buildCode(ctx.unwrapCode(self.ref.callback, True)) - out.append(f"err = {callback}({ctx.stateArg()}, start,{ctx.posArg()});") + out.append(f"err = {callback}({ctx.stateArg()}, start, {ctx.posArg()});") out.append("if (err != 0) {") tmp = [] @@ -676,6 +677,163 @@ def buildError(self, out: list[str], code: str): out.append(f"return {STATE_ERROR};") +# Based off arthurschreiber's work with Indutny's Tips and requests added to the mix. + +# 0x80 I8 +# 0x8000 I16 +# 0x800000 I24 +# 0x1000000 U24 + +class Int(Node): + def __init__(self, ref: _frontend.node.Int): + super().__init__(ref) + self.ref = ref + self.offset = ref.byteOffset + # I'm going to deviate from arthurschreiber's work a bit with indutny's suggestions. + # we should really be using bitwise operators like rshift and lshift + @property + def pair(self): + return self.compilation, self.compilation.stateField(self.ref.field) + + def readInt8(self, out: list[str]) -> None: + ctx, index = self.pair + out.append(f"{index} = ((*{ctx.posArg()}) & 0x80);") + + def readUInt8(self, out: list[str]) -> None: + ctx, index = self.pair + out.append(f"{index} = (*{ctx.posArg()});") + + # LITTLE ENDIAN + + def readInt16LE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + # Since BE Belongs to performing << aka left shifts we do >> right shifts + out.append(f"{index} = ({index} >> 8) | ((*{ctx.posArg()}) & 0x80);") + + def readUInt16LE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});") + + def readInt24LE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + elif self.offset == 1: + out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} >> 8) | ((*{ctx.posArg()}) & 0x80);") + + def readUInt24LE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});") + + def readInt32LE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + elif self.offset in (1, 2): + out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} >> 8) | ((*{ctx.posArg()}) & 0x80);") + + def readUInt32LE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} >> 8) | (*{ctx.posArg()});") + + # BIG ENDIAN + + def readInt16BE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + # Since LE Belongs to >> we do "<<" instead + out.append(f"{index} = ({index} << 8) | ((*{ctx.posArg()}) & 0x80);") + + def readUInt16BE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});") + + def readInt24BE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + elif self.offset == 1: + out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} << 8) | ((*{ctx.posArg()}) & 0x80);") + + def readUInt24BE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});") + + def readInt32BE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + elif self.offset in (1, 2): + out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} << 8) | ((*{ctx.posArg()}) & 0x80);") + + def readUInt32BE(self, out: list[str]) -> None: + ctx, index = self.pair + if self.offset == 0: + out.append(f"{index} = (*{ctx.posArg()});") + else: + out.append(f"{index} = ({index} << 8) | (*{ctx.posArg()});") + + + def doBuild(self, out:list[str]): + self.prologue(out) + # I'm still supporting 3.9 but I plan to drop it's support in favor of match case soon... + bits = self.ref.bits + + if self.compilation.getFieldType(self.ref.field) == 'ptr': + raise ValueError(f'property {self.ref.field} should not use pointers but it was given \"ptr\"') + + if bits == 1: + self.readInt8(out) if self.ref.signed else self.readUInt8(out) + elif bits == 2: + if self.ref.littleEndian: + self.readInt16LE(out) if self.ref.signed else self.readUInt16LE(out) + else: + self.readInt16BE(out) if self.ref.signed else self.readUInt16BE(out) + elif bits == 3: + if self.ref.littleEndian: + self.readInt24LE(out) if self.ref.signed else self.readUInt24LE(out) + else: + self.readInt24BE(out) if self.ref.signed else self.readUInt24BE(out) + else: + if self.ref.littleEndian: + self.readInt32LE(out) if self.ref.signed else self.readUInt32LE(out) + else: + self.readInt32BE(out) if self.ref.signed else self.readUInt32BE(out) + # TODO: uint64 & int64 + + self.tailTo(out, self.ref.otherwise.node, self.ref.otherwise.noAdvance, None) + + + + MAX_CHAR = 0xFF TABLE_GROUP = 16 @@ -1077,13 +1235,14 @@ def unwrapNode(self, node: IWrap[_frontend.node.Node]): r = Consume(ref) elif isinstance(ref, _frontend.node.Empty): r = Empty(ref) + elif isinstance(ref, _frontend.node.Pause): + r = Pause(ref) + elif isinstance(ref, _frontend.node.Error): r = Error(ref) elif isinstance(ref, _frontend.node.Invoke): r = Invoke(ref) - elif isinstance(ref, _frontend.node.Pause): - r = Pause(ref) - + elif isinstance(ref, _frontend.node.SpanStart): r = SpanStart(ref) @@ -1096,6 +1255,8 @@ def unwrapNode(self, node: IWrap[_frontend.node.Node]): r = Sequence(ref) elif isinstance(ref, _frontend.node.TableLookup): r = TableLookup(ref) + elif isinstance(ref, _frontend.node.Int): + r = Int(ref) else: raise TypeError( f'refrence "{ref}" is an Invalid Code Type , TypeName:"{ref.__class__.__name__}"' diff --git a/llparse/frontend.py b/llparse/frontend.py index 3cb8a96..223ac3c 100644 --- a/llparse/frontend.py +++ b/llparse/frontend.py @@ -211,7 +211,7 @@ def ID(): result = nodeImpl.Error(_frontend.node.Error(ID(), node.code, node.reason)) elif isinstance(node, source.code.Pause): - result = nodeImpl.Pause(_frontend.node.Error(ID(), node.code, node.reason)) + result = nodeImpl.Pause(_frontend.node.Pause(ID(), node.code, node.reason)) elif isinstance(node, source.code.Comsume): result = nodeImpl.Consume(_frontend.node.Consume(ID(), node.field)) @@ -244,6 +244,10 @@ def ID(): elif isinstance(node, source.code.Match): result = self.translateMatch(node) + + elif isinstance(node, source.node.Int): + result = self.translateInt(node) + else: raise Exception(f'Unknown Node Type for :"{node.name}" {type(node)}') @@ -251,24 +255,26 @@ def ID(): if isinstance(result, list): # result:list[WrappedNode] - assert isinstance(node, source.code.Match) - _match = node - if not otherwise: - raise Exception(f'Node "{node.name}" has no ".otherwise()"') + assert isinstance(node, (source.code.Match, source.node.Int)) + _match = node + + assert otherwise, (f'Node "{node.name}" has no ".otherwise()"') - else: + if isinstance(node, source.node.Match): for child in result: if not child.ref.otherwise: child.ref.setOtherwise( self.translate(otherwise.node), otherwise.noAdvance ) + transform = self.translateTransform(_match.getTransform()) + for child in result: + # TODO Vizonex : This might break , be sure to make a workaround function here... + child.ref.setTransform(transform) - transform = self.translateTransform(_match.getTransform()) - for child in result: - # TODO Vizonex : This might break , be sure to make a workaround function here... - child.ref.setTransform(transform) - + + else: + result[-1].ref.setOtherwise(self.translate(otherwise.node), otherwise.noAdvance) assert len(result) >= 1 return result[0] @@ -299,6 +305,23 @@ def ID(): assert len(list(node)) == 0 return single + + def translateInt(self, node: source.node.Int) -> list[IWrap[_frontend.node.Int]]: + inner = _frontend.node.Int(self.Id.id(node.name), node.field, node.bits, node.signed, node.little_endian, 0) + result = [self.implementation.node.Int(inner)] + # front is to avoid overlapping with python's functions (aka next) + front = self.Map[node] = result[0] + + for offset in range(1, node.bits): + unique_name = self.Id.id(f"{node.name}_byte{offset + 1}") + inner = _frontend.node.Int(unique_name, node.field, node.bits, node.signed, node.little_endian, offset) + outer = self.implementation.node.Int(inner) + result.append(outer) + # Integers will advance since they are unpacking values... + front.ref.setOtherwise(outer, False) + front = result[-1] + return result + def maybeTableLookup( self, node: source.code.Match, trie: TrieSingle, children: MatchChildren diff --git a/llparse/pybuilder/__init__.py b/llparse/pybuilder/__init__.py index 7c981f3..7c15505 100644 --- a/llparse/pybuilder/__init__.py +++ b/llparse/pybuilder/__init__.py @@ -1,2 +1,8 @@ from ..pybuilder.builder import * from ..pybuilder.loopchecker import * +from ..pybuilder.main_code import ( +# I'll add more soon I feel a little lazy at the moment. + Node, + Match, + Int +) diff --git a/llparse/pybuilder/builder.py b/llparse/pybuilder/builder.py index 366efe1..9cc6ed8 100644 --- a/llparse/pybuilder/builder.py +++ b/llparse/pybuilder/builder.py @@ -1,7 +1,8 @@ from typing import Literal, Optional, Union - from ..pybuilder import main_code as code +# typehinting node and code (TODO: Vizonex) Lets seperate the modules soon... +node = code # from pydot import graph_from_dot_data @@ -316,3 +317,38 @@ def property(self, ty: Literal["i8", "i16", "i32", "i64", "ptr"], name: str): def properties(self) -> list[Property]: """Return list of all allocated properties in parser's state.""" return list(self.privProperties.values()) + + def intBE(self, field: str, bits:int): + """ + :param field: State's property name + :param bits: Number of bits to use + """ + return code.Int(field, bits, True, False) + + def intLE(self, field: str, bits: int): + """ + return a node for unpacking arrays to integers + + :param field: State's property name + :param bits: Number of bits to use + """ + return code.Int(field, bits, True, True) + + def uintBE(self, field: str, bits: int): + """ + return a node for unpacking arrays to integers + + :param field: State's property name + :param bits: Number of bits to use + """ + return code.Int(field, bits, False, False) + + def uintLE(self, field: str, bits: int): + """ + return a node for unpacking arrays to integers + + :param field: State's property name + :param bits: Number of bits to use + """ + return code.Int(field, bits, False, True) + \ No newline at end of file diff --git a/llparse/pybuilder/main_code.py b/llparse/pybuilder/main_code.py index d0c455a..f6ff293 100644 --- a/llparse/pybuilder/main_code.py +++ b/llparse/pybuilder/main_code.py @@ -92,6 +92,8 @@ def __init__(self, name: str) -> None: super().__init__("match", name) + + @dataclass class IMulAddOptions: base: int @@ -244,6 +246,39 @@ def __init__(self, code: Code, IInvokeMap: dict[int, Node]) -> None: self.addEdge(Edge(targetNode, True, numKey, None)) + +# Not in llparse node-js (yet) But I wanted to implement +# this into my version since I am making a very important +# http2 frame parser + +# SEE: https://github.com/nodejs/llparse-frontend/pull/1 + +def build_name(field:str, bits: int, signed:bool, little_endian:bool) -> str: + result = f"{field}_{'int' if signed else 'uint'}_{bits * 8}" + if bits > 1: + return result + ('_le' if little_endian else 'be') + else: + return result + + +class Int(Node): + """Used for parsing bytes via unpacking""" + def __init__(self, field: str, bits: int, signed: bool, little_endian: bool) -> None: + """ + :param field: State's property name + :param bits: Number of bits to use + :param signed: Number is signed + :param little_endian: true if le, false if be + """ + if bits < 0: + raise ValueError("bits should be a positive integer") + self.field = field + self.bits = bits + self.signed = signed + self.little_endian = little_endian + super().__init__(build_name(field, bits, signed, little_endian)) + + # -- Transfroms -- TransformName = ["to_lower_unsafe", "to_lower"] diff --git a/llparse/pyfront/front.py b/llparse/pyfront/front.py index a76c8de..24a5f42 100644 --- a/llparse/pyfront/front.py +++ b/llparse/pyfront/front.py @@ -14,9 +14,6 @@ class IWrap(Generic[T]): ref: T - # def __hash__(self) -> int: - # return hash(self.ref) - def toCacheKey(value: Union[int, bool]) -> str: if isinstance(value, int): diff --git a/llparse/pyfront/implementation.py b/llparse/pyfront/implementation.py index a8d29ac..86cc4cc 100644 --- a/llparse/pyfront/implementation.py +++ b/llparse/pyfront/implementation.py @@ -37,6 +37,9 @@ def SpanStart(self, n: node.SpanStart): def TableLookup(self, n: node.TableLookup): return IWrap(n) + + def Int(self, n: node.Int): + return IWrap(n) class ITransformImplementation: diff --git a/llparse/pyfront/nodes.py b/llparse/pyfront/nodes.py index 55c8400..b259865 100644 --- a/llparse/pyfront/nodes.py +++ b/llparse/pyfront/nodes.py @@ -153,6 +153,26 @@ def __init__(self, id: IUniqueName, code: int, reason: str) -> None: super().__init__(id, code, reason) +# Not in llparse node-js (yet) But I wanted to implement +# this into my version since I am making a very important +# http2 frame parser + +# SEE: https://github.com/nodejs/llparse-frontend/pull/1 + +@dataclass +class Int(Node): + field: str + bits: int + signed: bool + littleEndian: bool + byteOffset: int + + def __hash__(self): + return hash(self.id) + + + + @dataclass class ISeqEdge: node: IWrap[Node] diff --git a/pyproject.toml b/pyproject.toml index 39b3c9e..18bea3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "llparse" -version = "0.1.3" +version = "0.1.4" description = "A Parody of llparse written for writing C Parsers with Python" readme = "README.md" authors = [ diff --git a/tests/test_frontend.py b/tests/test_frontend.py index c9cfd46..1629b71 100644 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -22,4 +22,14 @@ def test_build_tables(): assert "lookup_table" in p.build(start).c - +def test_pausing(): + # Ensure frotentend LoopChecker does not mark off against Pausing + p = LLParse("lltest") + s = p.node('start') + s2 = p.node('start2') + s.match('p', p.pause(1, 'parser was asked to pause').otherwise(s2)).skipTo(s) + s2.match('p', p.pause(2, 'parser was asked to pause again').otherwise(s)).skipTo(s2) + + p.build(s) + +