Add lexer for HercScript language highlight

guilherme-gm · guilherme-gm · commit c2d51790d4ea · 2024-04-24T22:00:45.000-03:00
this is a basic highlight based on Pygments C one, from my tests it
gives a satisfatory result and should be good enough for our current
docs.

it may be improved later as we find places where it doesn't work very
well.

Docs can format hercules scripts using "```HercScript" to get proper
highlight
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -13,6 +13,7 @@ jobs:
       - uses: actions/checkout@v3
       - run: python -m pip install --upgrade pip
       - run: pip install mkdocs mkdocs-material
+      - run: pip install -e ./hercscript-lexer
       - run: cd docs
       - run: mkdocs build -f mkdocs.yml
       - name: Upload GitHub Pages artifact
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 /.venv
 /site/
+/hercscript-lexer/hercscript_lexers.egg-info
+/hercscript-lexer/hercscript_lexers/__pycache__
diff --git a/docs/contributing/editing-the-docs.md b/docs/contributing/editing-the-docs.md
@@ -15,9 +15,15 @@ submit a [Pull request](./creating-pull-requests.md) to the [Hercules-docs repos
 ## Setup
 Hercules docs uses mkdocs-material.
 
-You will need to have Python3 installed and install `mkdocs-material` package.
+You will need to have Python3 installed, and install `mkdocs-material` and our lexer packages.
 
-You can install it with `pip install mkdocs-material` or `pip3 install mkdocs-material`.
+You can install it with:
+```SH
+pip install mkdocs-material
+pip install -e ./hercscript-lexer # Optional, required for HercScript highlighting
+```
+
+or perform the same commands with `pip3`.
 
 For more information about installing mkdocs-material, and other alternatives,
 see [Mkdocs Material's getting started](https://squidfunk.github.io/mkdocs-material/getting-started/#installation)
diff --git a/hercscript-lexer/hercscript_lexers/HercScriptLexer.py b/hercscript-lexer/hercscript_lexers/HercScriptLexer.py
@@ -0,0 +1,118 @@
+from pygments.lexer import RegexLexer, include, bygroups, using, \
+    this, default, words
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+    Number, Punctuation, Whitespace
+
+__all__ = ['HercScriptLexer']
+
+
+class HercScriptLexer(RegexLexer):
+    """
+    Hercules Script (a.k.a. Athena Script) lexer.
+
+    Based on Pygments official C grammar
+    """
+
+    name = 'hercscript'
+    aliases = ['hercscript', 'athenascript']
+    url = ''
+    version_added = ''
+    priority = 0.1
+
+    # Hexadecimal part in an hexadecimal integer literal.
+    # This includes separators matching.
+    _hexpart = r'[0-9a-fA-F](_?[0-9a-fA-F])*'
+    # Decimal part in an decimal integer literal.
+    # This includes separators matching.
+    _decpart = r'\d(\_?\d)*'
+    
+    # Identifier regex with C and C++ Universal Character Name (UCN) support.
+    _ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+'
+
+    # Single and multiline comment regexes
+    # Beware not to use *? for the inner content! When these regexes
+    # are embedded in larger regexes, that can cause the stuff*? to
+    # match more than it would have if the regex had been used in
+    # a standalone way ...
+    _comment_single = r'//(?:.|(?<=\\)\n)*\n'
+    _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/'
+
+    tokens = {
+        'whitespace': [
+            # Labels:
+            # Line start and possible indentation.
+            (r'(^[ \t]*)'
+                # Not followed by keywords which can be mistaken as labels.
+                r'(?!(?:default)\b)'
+                # Actual label, followed by a single colon.
+                r'(' + _ident + r')(\s*)(:)(?!:)',
+                bygroups(Whitespace, Name.Label, Whitespace, Punctuation)
+            ),
+            (r'\n', Whitespace),
+            (r'[^\S\n]+', Whitespace),
+            (_comment_single, Comment.Single),
+            (_comment_multiline, Comment.Multiline),
+            # Open until EOF, so no ending delimiter
+            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
+        ],
+        'statements': [
+            include('keywords'),
+            (r'(-)?0[xX]' + _hexpart , Number.Hex),
+            (r'(-)?0[bB][01](_?[01])*', Number.Bin),
+            (r'(-)?0(_?[0-7])+', Number.Oct),
+            (r'(-)?' + _decpart, Number.Integer),
+            (r'[~!%^&*+=|?:<>/-]', Operator),
+            (r'[()\[\],.]', Punctuation),
+            (r'(true|false)\b', Name.Builtin),
+            ('"', String, 'string'),
+            (r'(\w+)(\s*\()', bygroups(Name.Function, using(this))),  # function call
+            (r'[\.#\$]?#?@?\w+\$?', Name.Variable),
+            (_ident, Name)
+        ],
+        'keywords': [
+            (r'case\b', Keyword, 'case-value'),
+            (
+                words(
+                    ('break', 'continue', 'default',
+                        'do', 'else', 'for', 'goto', 'if',
+                        'return', 'switch', 'while',
+                        'end', 'function', 'script', 'trader'
+                    ),
+                    suffix=r'\b'), Keyword
+            )
+        ],
+        'root': [
+            include('whitespace'),
+            include('keywords'),
+            default('statement'),
+        ],
+        'statement': [
+            include('whitespace'),
+            include('statements'),
+            (r'\}', Punctuation),
+            (r'[{;]', Punctuation, '#pop'),
+        ],
+        'string': [
+            (r'"', String, '#pop'),
+            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
+             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
+            (r'[^\\"\n]+', String),  # all other characters
+            (r'\\\n', String),  # line continuation
+            (r'\\', String),  # stray backslash
+        ],
+        # Mark identifiers preceded by `case` keyword as constants.
+        'case-value': [
+            (r'(?<!:)(:)(?!:)', Punctuation, '#pop'),
+            (_ident, Name.Constant),
+            include('whitespace'),
+            include('statements'),
+        ]
+    }
+
+    def __init__(self, **options):
+        RegexLexer.__init__(self, **options)
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        for index, token, value in \
+                RegexLexer.get_tokens_unprocessed(self, text, stack):
+            yield index, token, value
diff --git a/hercscript-lexer/hercscript_lexers/__init__.py b/hercscript-lexer/hercscript_lexers/__init__.py
@@ -0,0 +1,3 @@
+from .HercScriptLexer import HercScriptLexer
+
+__all__ = ("HercScriptLexer")
diff --git a/hercscript-lexer/setup.py b/hercscript-lexer/setup.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+"""Setup hercscript-lexers."""
+from setuptools import setup, find_packages
+
+entry_points = '''
+[pygments.lexers]
+hercscript=hercscript_lexers:HercScriptLexer
+'''
+
+setup(
+    name='hercscript-lexers',
+    version='1.0.0',
+    description='Pygments lexer package for hercscript.',
+    author='Hercules Team',
+    author_email='',
+    url='',
+    packages=find_packages(),
+    entry_points=entry_points,
+    install_requires=[
+        'Pygments>=2.0.1'
+    ],
+    zip_safe=True,
+    license='MIT License',
+    classifiers=[]
+)

-Original file line number
+Diff line change
@@ @@ -1,2 +1,4 @@ @@
 /.venv
 /site/
 +/hercscript-lexer/hercscript_lexers.egg-info
 +/hercscript-lexer/hercscript_lexers/__pycache__
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .HercScriptLexer import HercScriptLexer`
	`2`	`+`
	`3`	`+__all__ = ("HercScriptLexer")`