From f98bea220cbd8098eb9a7663ef2a7437f8e50927 Mon Sep 17 00:00:00 2001
From: ornariece <37-ornariece@users.noreply.git.malined.com>
Date: Tue, 22 Apr 2025 16:06:28 +0200
Subject: [PATCH 1/8] use token.value where required

---
 examples/advanced/tree_forest_transformer.py | 2 +-
 lark/indenter.py                             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/advanced/tree_forest_transformer.py b/examples/advanced/tree_forest_transformer.py
index 7582b5776..809d62e95 100644
--- a/examples/advanced/tree_forest_transformer.py
+++ b/examples/advanced/tree_forest_transformer.py
@@ -23,7 +23,7 @@ def adj(self, children):
         return Discard
 
     def __default_token__(self, token):
-        return token.capitalize()
+        return token.value.capitalize()
 
 grammar = """
     sentence: noun verb noun        -> simple
diff --git a/lark/indenter.py b/lark/indenter.py
index 037513bdf..2acdc2312 100644
--- a/lark/indenter.py
+++ b/lark/indenter.py
@@ -40,7 +40,7 @@ def handle_NL(self, token: Token) -> Iterator[Token]:
 
         yield token
 
-        indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
+        indent_str = token.value.rsplit('\n', 1)[1] # Tabs and spaces
         indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
 
         if indent > self.indent_level[-1]:

From 9b3f1a0383613a6df3dd3adc70b6c536ed61bd56 Mon Sep 17 00:00:00 2001
From: ornariece <37-ornariece@users.noreply.git.malined.com>
Date: Tue, 22 Apr 2025 16:43:21 +0200
Subject: [PATCH 2/8] handle postlex graciously

---
 lark/lexer.py            | 20 ++++++++++++++++++++
 lark/parser_frontends.py | 26 +++++++++++++-------------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 59d9acfd1..4572f5049 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -16,6 +16,7 @@
 if TYPE_CHECKING:
     from .common import LexerConf
     from .parsers.lalr_parser_state import ParserState
+    from .lark import PostLex
 
 from .utils import classify, get_regexp_width, Serialize, logger, TextSlice, TextOrSlice
 from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
@@ -468,6 +469,25 @@ def __copy__(self):
     _Token = Token
 
 
+class PostLexThread(LexerThread):
+    def __init__(self, lexer: 'Lexer', lexer_state: LexerState, postlex: 'PostLex'):
+        super().__init__(lexer, lexer_state)
+        self.postlex = postlex
+
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: 'PostLex') -> 'PostLexThread':
+        text = TextSlice.cast_from(text_or_slice)
+        return cls(lexer, LexerState(text), postlex)
+
+    def lex(self, parser_state):
+        # Get tokens from the underlying lexer and process with postlex
+        tokens = super().lex(parser_state)
+        return self.postlex.process(tokens)
+
+    def __copy__(self):
+        return type(self)(self.lexer, copy(self.state), self.postlex)
+
+
 _Callback = Callable[[Token], Token]
 
 class Lexer(ABC):
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index bfe4eba98..ca20a317f 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -2,7 +2,7 @@
 
 from .exceptions import ConfigurationError, GrammarError, assert_config
 from .utils import get_regexp_width, Serialize, TextOrSlice, TextSlice
-from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer
+from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer, PostLexThread
 from .parsers import earley, xearley, cyk
 from .parsers.lalr_parser import LALR_Parser
 from .tree import Tree
@@ -95,8 +95,7 @@ def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, pars
         else:
             raise TypeError("Bad value for lexer_type: {lexer_type}")
 
-        if lexer_conf.postlex:
-            self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
+        self.postlex: PostLex | None = lexer_conf.postlex  # Store the postlex separately
 
     def _verify_start(self, start=None):
         if start is None:
@@ -109,8 +108,18 @@ def _verify_start(self, start=None):
         return start
 
     def _make_lexer_thread(self, text: Optional[TextOrSlice]) -> Union[TextOrSlice, LexerThread, None]:
+        if self.skip_lexer:
+            return text
+
         cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
-        return text if self.skip_lexer else cls(self.lexer, None) if text is None else cls.from_text(self.lexer, text)
+        
+        thread = cls(self.lexer, text) if text is None else cls.from_text(self.lexer, text)
+
+        # If we have a postlex, wrap the thread
+        if self.postlex is not None:
+            return PostLexThread(self.lexer, thread.state, self.postlex)
+
+        return thread
 
     def parse(self, text: Optional[TextOrSlice], start=None, on_error=None):
         if self.lexer_conf.lexer_type in ("dynamic", "dynamic_complete"):
@@ -151,15 +160,6 @@ def _get_lexer_callbacks(transformer, terminals):
             result[terminal.name] = callback
     return result
 
-class PostLexConnector:
-    def __init__(self, lexer, postlexer):
-        self.lexer = lexer
-        self.postlexer = postlexer
-
-    def lex(self, lexer_state, parser_state):
-        i = self.lexer.lex(lexer_state, parser_state)
-        return self.postlexer.process(i)
-
 
 
 def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:

From c59bedddba3c8a5abaab3f44c9ab29187655e83f Mon Sep 17 00:00:00 2001
From: ornariece <37-ornariece@users.noreply.git.malined.com>
Date: Tue, 22 Apr 2025 17:25:47 +0200
Subject: [PATCH 3/8] fix typing

---
 lark/lexer.py            | 16 ++++++++++++++--
 lark/parser_frontends.py | 15 ++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 4572f5049..89eca136a 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -5,7 +5,7 @@
 from contextlib import suppress
 from typing import (
     TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
-    ClassVar, TYPE_CHECKING, overload
+    ClassVar, TYPE_CHECKING, overload, Union
 )
 from types import ModuleType
 import warnings
@@ -470,12 +470,24 @@ def __copy__(self):
 
 
 class PostLexThread(LexerThread):
-    def __init__(self, lexer: 'Lexer', lexer_state: LexerState, postlex: 'PostLex'):
+    def __init__(self, lexer: 'Lexer', lexer_state: Optional[LexerState], postlex: 'PostLex'):
         super().__init__(lexer, lexer_state)
         self.postlex = postlex
 
+    @overload
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: None = None) -> 'LexerThread':
+        pass
+
+    @overload
     @classmethod
     def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: 'PostLex') -> 'PostLexThread':
+        pass
+
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: Union['PostLex', None] = None) -> Union['LexerThread', 'PostLexThread']:
+        if postlex is None:
+            return super().from_text(lexer, text_or_slice)
         text = TextSlice.cast_from(text_or_slice)
         return cls(lexer, LexerState(text), postlex)
 
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index ca20a317f..1c9e353d8 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING
+from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING, Type
 
 from .exceptions import ConfigurationError, GrammarError, assert_config
 from .utils import get_regexp_width, Serialize, TextOrSlice, TextSlice
@@ -10,6 +10,7 @@
 
 if TYPE_CHECKING:
     from .parsers.lalr_analysis import ParseTableBase
+    from .lark import PostLex
 
 
 ###{standalone
@@ -95,7 +96,7 @@ def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, pars
         else:
             raise TypeError("Bad value for lexer_type: {lexer_type}")
 
-        self.postlex: PostLex | None = lexer_conf.postlex  # Store the postlex separately
+        self.postlex: Union['PostLex', None] = lexer_conf.postlex  # Store the postlex separately
 
     def _verify_start(self, start=None):
         if start is None:
@@ -111,15 +112,15 @@ def _make_lexer_thread(self, text: Optional[TextOrSlice]) -> Union[TextOrSlice,
         if self.skip_lexer:
             return text
 
-        cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
-        
-        thread = cls(self.lexer, text) if text is None else cls.from_text(self.lexer, text)
+        cls: Type[LexerThread]
 
         # If we have a postlex, wrap the thread
         if self.postlex is not None:
-            return PostLexThread(self.lexer, thread.state, self.postlex)
+            cls = PostLexThread
+            return cls(self.lexer, text, self.postlex) if text is None else cls.from_text(self.lexer, text, self.postlex)
 
-        return thread
+        cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
+        return cls(self.lexer, text) if text is None else cls.from_text(self.lexer, text)
 
     def parse(self, text: Optional[TextOrSlice], start=None, on_error=None):
         if self.lexer_conf.lexer_type in ("dynamic", "dynamic_complete"):

From 4280441907c57cde7b9a2a9a44f9c147264a4a74 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 23 Apr 2025 08:32:02 +0300
Subject: [PATCH 4/8] Docs: Updated link of DSL article to a new version, with
 better formatting, and support for a dark theme

---
 README.md      | 2 +-
 docs/index.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 74710f713..88ebf1b93 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h
 - [Cheatsheet (PDF)](/docs/_static/lark_cheatsheet.pdf)
 - [Online IDE](https://lark-parser.org/ide)
 - [Tutorial](/docs/json_tutorial.md) for writing a JSON parser.
-- Blog post: [How to write a DSL with Lark](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/)
+- Blog post: [How to write a DSL with Lark](https://eshsoft.com/blog/write-dsl-in-python-with-lark)
 - [Gitter chat](https://gitter.im/lark-parser/Lobby)
 
 ### Install Lark
diff --git a/docs/index.rst b/docs/index.rst
index e69e2b9d0..1f7347e91 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -115,7 +115,7 @@ Resources
 .. _Examples: https://github.com/lark-parser/lark/tree/master/examples
 .. _Third-party examples: https://github.com/ligurio/lark-grammars
 .. _Online IDE: https://lark-parser.org/ide
-.. _How to write a DSL: http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/
+.. _How to write a DSL: https://eshsoft.com/blog/write-dsl-in-python-with-lark
 .. _Program Synthesis is Possible: https://www.cs.cornell.edu/~asampson/blog/minisynth.html
 .. _Cheatsheet (PDF): _static/lark_cheatsheet.pdf
 .. _Gitter: https://gitter.im/lark-parser/Lobby

From f9ba191d2d3ca4e9d46f79b2fe50c078edebe9ae Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 23 Apr 2025 08:34:43 +0300
Subject: [PATCH 5/8] Upgrade pre-commit version

---
 .github/workflows/mypy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
index 9624cd9cd..108c19c16 100644
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -16,4 +16,4 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - uses: actions/setup-python@v3
-    - uses: pre-commit/action@v2.0.3
+    - uses: pre-commit/action@v3.0.1

From 1183ae55470978939edc1e657a32bc37e823996c Mon Sep 17 00:00:00 2001
From: ornariece <37-ornariece@users.noreply.git.malined.com>
Date: Tue, 22 Apr 2025 16:06:28 +0200
Subject: [PATCH 6/8] use token.value where required

---
 examples/advanced/tree_forest_transformer.py | 2 +-
 lark/indenter.py                             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/advanced/tree_forest_transformer.py b/examples/advanced/tree_forest_transformer.py
index 7582b5776..809d62e95 100644
--- a/examples/advanced/tree_forest_transformer.py
+++ b/examples/advanced/tree_forest_transformer.py
@@ -23,7 +23,7 @@ def adj(self, children):
         return Discard
 
     def __default_token__(self, token):
-        return token.capitalize()
+        return token.value.capitalize()
 
 grammar = """
     sentence: noun verb noun        -> simple
diff --git a/lark/indenter.py b/lark/indenter.py
index 037513bdf..2acdc2312 100644
--- a/lark/indenter.py
+++ b/lark/indenter.py
@@ -40,7 +40,7 @@ def handle_NL(self, token: Token) -> Iterator[Token]:
 
         yield token
 
-        indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
+        indent_str = token.value.rsplit('\n', 1)[1] # Tabs and spaces
         indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
 
         if indent > self.indent_level[-1]:

From 6635f127844dd6608587dc1b67180c073104fecc Mon Sep 17 00:00:00 2001
From: ornariece <37-ornariece@users.noreply.git.malined.com>
Date: Tue, 22 Apr 2025 16:43:21 +0200
Subject: [PATCH 7/8] handle postlex graciously

---
 lark/lexer.py            | 20 ++++++++++++++++++++
 lark/parser_frontends.py | 26 +++++++++++++-------------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 59d9acfd1..4572f5049 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -16,6 +16,7 @@
 if TYPE_CHECKING:
     from .common import LexerConf
     from .parsers.lalr_parser_state import ParserState
+    from .lark import PostLex
 
 from .utils import classify, get_regexp_width, Serialize, logger, TextSlice, TextOrSlice
 from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
@@ -468,6 +469,25 @@ def __copy__(self):
     _Token = Token
 
 
+class PostLexThread(LexerThread):
+    def __init__(self, lexer: 'Lexer', lexer_state: LexerState, postlex: 'PostLex'):
+        super().__init__(lexer, lexer_state)
+        self.postlex = postlex
+
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: 'PostLex') -> 'PostLexThread':
+        text = TextSlice.cast_from(text_or_slice)
+        return cls(lexer, LexerState(text), postlex)
+
+    def lex(self, parser_state):
+        # Get tokens from the underlying lexer and process with postlex
+        tokens = super().lex(parser_state)
+        return self.postlex.process(tokens)
+
+    def __copy__(self):
+        return type(self)(self.lexer, copy(self.state), self.postlex)
+
+
 _Callback = Callable[[Token], Token]
 
 class Lexer(ABC):
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index bfe4eba98..ca20a317f 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -2,7 +2,7 @@
 
 from .exceptions import ConfigurationError, GrammarError, assert_config
 from .utils import get_regexp_width, Serialize, TextOrSlice, TextSlice
-from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer
+from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer, PostLexThread
 from .parsers import earley, xearley, cyk
 from .parsers.lalr_parser import LALR_Parser
 from .tree import Tree
@@ -95,8 +95,7 @@ def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, pars
         else:
             raise TypeError("Bad value for lexer_type: {lexer_type}")
 
-        if lexer_conf.postlex:
-            self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
+        self.postlex: PostLex | None = lexer_conf.postlex  # Store the postlex separately
 
     def _verify_start(self, start=None):
         if start is None:
@@ -109,8 +108,18 @@ def _verify_start(self, start=None):
         return start
 
     def _make_lexer_thread(self, text: Optional[TextOrSlice]) -> Union[TextOrSlice, LexerThread, None]:
+        if self.skip_lexer:
+            return text
+
         cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
-        return text if self.skip_lexer else cls(self.lexer, None) if text is None else cls.from_text(self.lexer, text)
+        
+        thread = cls(self.lexer, text) if text is None else cls.from_text(self.lexer, text)
+
+        # If we have a postlex, wrap the thread
+        if self.postlex is not None:
+            return PostLexThread(self.lexer, thread.state, self.postlex)
+
+        return thread
 
     def parse(self, text: Optional[TextOrSlice], start=None, on_error=None):
         if self.lexer_conf.lexer_type in ("dynamic", "dynamic_complete"):
@@ -151,15 +160,6 @@ def _get_lexer_callbacks(transformer, terminals):
             result[terminal.name] = callback
     return result
 
-class PostLexConnector:
-    def __init__(self, lexer, postlexer):
-        self.lexer = lexer
-        self.postlexer = postlexer
-
-    def lex(self, lexer_state, parser_state):
-        i = self.lexer.lex(lexer_state, parser_state)
-        return self.postlexer.process(i)
-
 
 
 def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:

From 5374406b5a21efaf6383ebc3a08e44ad44d21104 Mon Sep 17 00:00:00 2001
From: ornariece <37-ornariece@users.noreply.git.malined.com>
Date: Tue, 22 Apr 2025 17:25:47 +0200
Subject: [PATCH 8/8] fix typing

---
 lark/lexer.py            | 16 ++++++++++++++--
 lark/parser_frontends.py | 15 ++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 4572f5049..89eca136a 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -5,7 +5,7 @@
 from contextlib import suppress
 from typing import (
     TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
-    ClassVar, TYPE_CHECKING, overload
+    ClassVar, TYPE_CHECKING, overload, Union
 )
 from types import ModuleType
 import warnings
@@ -470,12 +470,24 @@ def __copy__(self):
 
 
 class PostLexThread(LexerThread):
-    def __init__(self, lexer: 'Lexer', lexer_state: LexerState, postlex: 'PostLex'):
+    def __init__(self, lexer: 'Lexer', lexer_state: Optional[LexerState], postlex: 'PostLex'):
         super().__init__(lexer, lexer_state)
         self.postlex = postlex
 
+    @overload
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: None = None) -> 'LexerThread':
+        pass
+
+    @overload
     @classmethod
     def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: 'PostLex') -> 'PostLexThread':
+        pass
+
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice, postlex: Union['PostLex', None] = None) -> Union['LexerThread', 'PostLexThread']:
+        if postlex is None:
+            return super().from_text(lexer, text_or_slice)
         text = TextSlice.cast_from(text_or_slice)
         return cls(lexer, LexerState(text), postlex)
 
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index ca20a317f..1c9e353d8 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING
+from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING, Type
 
 from .exceptions import ConfigurationError, GrammarError, assert_config
 from .utils import get_regexp_width, Serialize, TextOrSlice, TextSlice
@@ -10,6 +10,7 @@
 
 if TYPE_CHECKING:
     from .parsers.lalr_analysis import ParseTableBase
+    from .lark import PostLex
 
 
 ###{standalone
@@ -95,7 +96,7 @@ def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, pars
         else:
             raise TypeError("Bad value for lexer_type: {lexer_type}")
 
-        self.postlex: PostLex | None = lexer_conf.postlex  # Store the postlex separately
+        self.postlex: Union['PostLex', None] = lexer_conf.postlex  # Store the postlex separately
 
     def _verify_start(self, start=None):
         if start is None:
@@ -111,15 +112,15 @@ def _make_lexer_thread(self, text: Optional[TextOrSlice]) -> Union[TextOrSlice,
         if self.skip_lexer:
             return text
 
-        cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
-        
-        thread = cls(self.lexer, text) if text is None else cls.from_text(self.lexer, text)
+        cls: Type[LexerThread]
 
         # If we have a postlex, wrap the thread
         if self.postlex is not None:
-            return PostLexThread(self.lexer, thread.state, self.postlex)
+            cls = PostLexThread
+            return cls(self.lexer, text, self.postlex) if text is None else cls.from_text(self.lexer, text, self.postlex)
 
-        return thread
+        cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
+        return cls(self.lexer, text) if text is None else cls.from_text(self.lexer, text)
 
     def parse(self, text: Optional[TextOrSlice], start=None, on_error=None):
         if self.lexer_conf.lexer_type in ("dynamic", "dynamic_complete"):