diff --git a/External/IronPython.Modules.dll b/External/IronPython.Modules.dll index ad793cc..1c12bd4 100644 Binary files a/External/IronPython.Modules.dll and b/External/IronPython.Modules.dll differ diff --git a/External/IronPython.dll b/External/IronPython.dll index 53c241a..6ac803d 100644 Binary files a/External/IronPython.dll and b/External/IronPython.dll differ diff --git a/External/Microsoft.Dynamic.dll b/External/Microsoft.Dynamic.dll index 212b3af..0cd7f76 100644 Binary files a/External/Microsoft.Dynamic.dll and b/External/Microsoft.Dynamic.dll differ diff --git a/External/Microsoft.Scripting.Core.dll b/External/Microsoft.Scripting.Core.dll index cd3be18..eb3eb72 100644 Binary files a/External/Microsoft.Scripting.Core.dll and b/External/Microsoft.Scripting.Core.dll differ diff --git a/External/Microsoft.Scripting.Debugging.dll b/External/Microsoft.Scripting.Debugging.dll index 53a47d2..763c210 100644 Binary files a/External/Microsoft.Scripting.Debugging.dll and b/External/Microsoft.Scripting.Debugging.dll differ diff --git a/External/Microsoft.Scripting.ExtensionAttribute.dll b/External/Microsoft.Scripting.ExtensionAttribute.dll index 34fe55e..5eb33b1 100644 Binary files a/External/Microsoft.Scripting.ExtensionAttribute.dll and b/External/Microsoft.Scripting.ExtensionAttribute.dll differ diff --git a/External/Microsoft.Scripting.dll b/External/Microsoft.Scripting.dll index 5bba61d..91c759c 100644 Binary files a/External/Microsoft.Scripting.dll and b/External/Microsoft.Scripting.dll differ diff --git a/External/ipy.exe b/External/ipy.exe index 1e6b833..21a12fe 100644 Binary files a/External/ipy.exe and b/External/ipy.exe differ diff --git a/pygments_package/pygments/__init__.py b/pygments_package/pygments/__init__.py index 2d223b3..9762308 100644 --- a/pygments_package/pygments/__init__.py +++ b/pygments_package/pygments/__init__.py @@ -12,30 +12,29 @@ * a wide range of common languages and markup formats is supported * special attention is paid to details, increasing quality by a fair amount * support for new languages and formats are added easily - * a number of output formats, presently HTML, LaTeX, RTF, SVG and ANSI sequences + * a number of output formats, presently HTML, LaTeX, RTF, SVG, all image + formats that PIL supports, and ANSI sequences * it is usable as a command-line tool and as a library * ... and it highlights even Brainfuck! The `Pygments tip`_ is installable with ``easy_install Pygments==dev``. - .. _Pygments tip: http://dev.pocoo.org/hg/pygments-main/archive/tip.tar.gz#egg=Pygments-dev + .. _Pygments tip: + http://dev.pocoo.org/hg/pygments-main/archive/tip.tar.gz#egg=Pygments-dev - :copyright: 2006-2008 by Georg Brandl, Armin Ronacher and others. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ -__version__ = '1.0' -__author__ = 'Georg Brandl ' -__url__ = 'http://pygments.org/' -__license__ = 'BSD License' +__version__ = '1.3.1' __docformat__ = 'restructuredtext' __all__ = ['lex', 'format', 'highlight'] -import sys, os -from StringIO import StringIO -from cStringIO import StringIO as CStringIO +import sys + +from pygments.util import StringIO, BytesIO def lex(code, lexer): @@ -62,8 +61,8 @@ def format(tokens, formatter, outfile=None): """ try: if not outfile: - # if we want Unicode output, we have to use Python StringIO - realoutfile = formatter.encoding and CStringIO() or StringIO() + #print formatter, 'using', formatter.encoding + realoutfile = formatter.encoding and BytesIO() or StringIO() formatter.format(tokens, realoutfile) return realoutfile.getvalue() else: diff --git a/pygments_package/pygments/cmdline.py b/pygments_package/pygments/cmdline.py index d1d2141..cfce216 100644 --- a/pygments_package/pygments/cmdline.py +++ b/pygments_package/pygments/cmdline.py @@ -5,14 +5,14 @@ Command line interface. - :copyright: 2006-2008 by Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import sys import getopt from textwrap import dedent -from pygments import __version__, __author__, highlight +from pygments import __version__, highlight from pygments.util import ClassNotFound, OptionError, docstring_headline from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \ find_lexer_class, guess_lexer, TextLexer @@ -219,7 +219,7 @@ def main(args=sys.argv): return 0 if opts.pop('-V', None) is not None: - print 'Pygments version %s, (c) 2006-2008 by %s.' % (__version__, __author__) + print 'Pygments version %s, (c) 2006-2008 by Georg Brandl.' % __version__ return 0 # handle ``pygmentize -L`` @@ -359,14 +359,14 @@ def main(args=sys.argv): infn = args[0] try: - code = open(infn).read() + code = open(infn, 'rb').read() except Exception, err: print >>sys.stderr, 'Error: cannot read infile:', err return 1 if not lexer: try: - lexer = get_lexer_for_filename(infn, **parsed_opts) + lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound, err: if '-g' in opts: try: @@ -402,9 +402,12 @@ def main(args=sys.argv): # encoding pass-through fmter.encoding = 'latin1' else: - # use terminal encoding - lexer.encoding = getattr(sys.stdin, 'encoding', None) or 'ascii' - fmter.encoding = getattr(sys.stdout, 'encoding', None) or 'ascii' + if sys.version_info < (3,): + # use terminal encoding; Python 3's terminals already do that + lexer.encoding = getattr(sys.stdin, 'encoding', + None) or 'ascii' + fmter.encoding = getattr(sys.stdout, 'encoding', + None) or 'ascii' # ... and do it! try: diff --git a/pygments_package/pygments/console.py b/pygments_package/pygments/console.py index 90ea57b..92a8c6d 100644 --- a/pygments_package/pygments/console.py +++ b/pygments_package/pygments/console.py @@ -5,8 +5,8 @@ Format colored console output. - :copyright: 2006-2007 by Georg Brandl, Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ esc = "\x1b[" diff --git a/pygments_package/pygments/filter.py b/pygments_package/pygments/filter.py index 2da319d..acb0d0a 100644 --- a/pygments_package/pygments/filter.py +++ b/pygments_package/pygments/filter.py @@ -5,8 +5,8 @@ Module that implements the default filter. - :copyright: 2006-2007 by Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ diff --git a/pygments_package/pygments/filters/__init__.py b/pygments_package/pygments/filters/__init__.py index 971e263..504c3e1 100644 --- a/pygments_package/pygments/filters/__init__.py +++ b/pygments_package/pygments/filters/__init__.py @@ -6,20 +6,17 @@ Module containing filter lookup functions and default filters. - :copyright: 2006-2007 by Armin Ronacher, Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ -try: - set -except NameError: - from sets import Set as set import re + from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \ string_to_tokentype from pygments.filter import Filter -from pygments.util import get_list_opt, get_int_opt, get_bool_opt, get_choice_opt, \ - ClassNotFound, OptionError +from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \ + get_choice_opt, ClassNotFound, OptionError from pygments.plugin import find_plugin_filters @@ -283,10 +280,78 @@ def replacefunc(wschar): yield ttype, value +class GobbleFilter(Filter): + """ + Gobbles source code lines (eats initial characters). + + This filter drops the first ``n`` characters off every line of code. This + may be useful when the source code fed to the lexer is indented by a fixed + amount of space that isn't desired in the output. + + Options accepted: + + `n` : int + The number of characters to gobble. + + *New in Pygments 1.2.* + """ + def __init__(self, **options): + Filter.__init__(self, **options) + self.n = get_int_opt(options, 'n', 0) + + def gobble(self, value, left): + if left < len(value): + return value[left:], 0 + else: + return '', left - len(value) + + def filter(self, lexer, stream): + n = self.n + left = n # How many characters left to gobble. + for ttype, value in stream: + # Remove ``left`` tokens from first line, ``n`` from all others. + parts = value.split('\n') + (parts[0], left) = self.gobble(parts[0], left) + for i in range(1, len(parts)): + (parts[i], left) = self.gobble(parts[i], n) + value = '\n'.join(parts) + + if value != '': + yield ttype, value + + +class TokenMergeFilter(Filter): + """ + Merges consecutive tokens with the same token type in the output stream of a + lexer. + + *New in Pygments 1.2.* + """ + def __init__(self, **options): + Filter.__init__(self, **options) + + def filter(self, lexer, stream): + output = [] + current_type = None + current_value = None + for ttype, value in stream: + if ttype is current_type: + current_value += value + else: + if current_type is not None: + yield current_type, current_value + current_type = ttype + current_value = value + if current_type is not None: + yield current_type, current_value + + FILTERS = { 'codetagify': CodeTagFilter, 'keywordcase': KeywordCaseFilter, 'highlight': NameHighlightFilter, 'raiseonerror': RaiseOnErrorTokenFilter, 'whitespace': VisibleWhitespaceFilter, + 'gobble': GobbleFilter, + 'tokenmerge': TokenMergeFilter, } diff --git a/pygments_package/pygments/formatter.py b/pygments_package/pygments/formatter.py index 60848b5..6eea3d7 100644 --- a/pygments_package/pygments/formatter.py +++ b/pygments_package/pygments/formatter.py @@ -5,10 +5,12 @@ Base formatter class. - :copyright: 2006-2007 by Georg Brandl, Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ +import codecs + from pygments.util import get_bool_opt from pygments.styles import get_style_by_name @@ -84,4 +86,7 @@ def format(self, tokensource, outfile): Format ``tokensource``, an iterable of ``(tokentype, tokenstring)`` tuples and write it into ``outfile``. """ - raise NotImplementedError() + if self.encoding: + # wrap the outfile in a StreamWriter + outfile = codecs.lookup(self.encoding)[3](outfile) + return self.format_unencoded(tokensource, outfile) diff --git a/pygments_package/pygments/formatters/__init__.py b/pygments_package/pygments/formatters/__init__.py index fb200e1..0e02a52 100644 --- a/pygments_package/pygments/formatters/__init__.py +++ b/pygments_package/pygments/formatters/__init__.py @@ -5,15 +5,15 @@ Pygments formatters. - :copyright: 2006-2007 by Georg Brandl, Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import os.path import fnmatch from pygments.formatters._mapping import FORMATTERS from pygments.plugin import find_plugin_formatters -from pygments.util import docstring_headline, ClassNotFound +from pygments.util import ClassNotFound ns = globals() for fcls in FORMATTERS: diff --git a/pygments_package/pygments/formatters/_mapping.py b/pygments_package/pygments/formatters/_mapping.py index 3060f95..0c344a7 100644 --- a/pygments_package/pygments/formatters/_mapping.py +++ b/pygments_package/pygments/formatters/_mapping.py @@ -9,8 +9,8 @@ Do not alter the FORMATTERS dictionary by hand. - :copyright: 2006-2007 by Armin Ronacher, Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ from pygments.util import docstring_headline diff --git a/pygments_package/pygments/formatters/bbcode.py b/pygments_package/pygments/formatters/bbcode.py index 8714294..03852b3 100644 --- a/pygments_package/pygments/formatters/bbcode.py +++ b/pygments_package/pygments/formatters/bbcode.py @@ -5,8 +5,8 @@ BBcode formatter. - :copyright: 2006-2007 by Lukas Meuser. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ @@ -76,19 +76,16 @@ def _make_styles(self): self.styles[ttype] = start, end - def format(self, tokensource, outfile): + def format_unencoded(self, tokensource, outfile): if self._code: outfile.write('[code]') if self._mono: outfile.write('[font=monospace]') - enc = self.encoding lastval = '' lasttype = None for ttype, value in tokensource: - if enc: - value = value.encode(enc) while ttype not in self.styles: ttype = ttype.parent if ttype == lasttype: diff --git a/pygments_package/pygments/formatters/html.py b/pygments_package/pygments/formatters/html.py index be4c91b..5c0972e 100644 --- a/pygments_package/pygments/formatters/html.py +++ b/pygments_package/pygments/formatters/html.py @@ -5,20 +5,17 @@ Formatter for HTML output. - :copyright: 2006-2008 by Georg Brandl, Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ -import sys, os -import StringIO -try: - set -except NameError: - from sets import Set as set +import os +import sys +import StringIO from pygments.formatter import Formatter from pygments.token import Token, Text, STANDARD_TYPES -from pygments.util import get_bool_opt, get_int_opt, get_list_opt +from pygments.util import get_bool_opt, get_int_opt, get_list_opt, bytes __all__ = ['HtmlFormatter'] @@ -186,7 +183,9 @@ class HtmlFormatter(Formatter): `style` The style to use, can be a string or a Style subclass (default: - ``'default'``). + ``'default'``). This option has no effect if the `cssfile` + and `noclobber_cssfile` option are given and the file specified in + `cssfile` exists. `noclasses` If set to true, token ```` tags will not use CSS classes, but @@ -223,6 +222,12 @@ class HtmlFormatter(Formatter): file's path, if the latter can be found. The stylesheet is then written to this file instead of the HTML file. *New in Pygments 0.6.* + `noclobber_cssfile` + If `cssfile` is given and the specified file exists, the css file will + not be overwritten. This allows the use of the `full` option in + combination with a user specified css file. Default is ``False``. + *New in Pygments 1.1.* + `linenos` If set to ``'table'``, output line numbers as a table with two cells, one containing the line numbers, the other the whole code. This is @@ -274,6 +279,10 @@ class ``"special"`` (default: ``0``). output line in an anchor tag with a ``name`` of ``foo-linenumber``. This allows easy linking to certain lines. *New in Pygments 0.9.* + `anchorlinenos` + If set to `True`, will wrap line numbers in tags. Used in + combination with `linenos` and `lineanchors`. + **Subclassing the HTML formatter** @@ -330,14 +339,16 @@ def _wrap_code(self, source): def __init__(self, **options): Formatter.__init__(self, **options) - self.title = self._encodeifneeded(self.title) + self.title = self._decodeifneeded(self.title) self.nowrap = get_bool_opt(options, 'nowrap', False) self.noclasses = get_bool_opt(options, 'noclasses', False) self.classprefix = options.get('classprefix', '') - self.cssclass = self._encodeifneeded(options.get('cssclass', 'highlight')) - self.cssstyles = self._encodeifneeded(options.get('cssstyles', '')) - self.prestyles = self._encodeifneeded(options.get('prestyles', '')) - self.cssfile = self._encodeifneeded(options.get('cssfile', '')) + self.cssclass = self._decodeifneeded(options.get('cssclass', 'highlight')) + self.cssstyles = self._decodeifneeded(options.get('cssstyles', '')) + self.prestyles = self._decodeifneeded(options.get('prestyles', '')) + self.cssfile = self._decodeifneeded(options.get('cssfile', '')) + self.noclobber_cssfile = get_bool_opt(options, 'noclobber_cssfile', False) + linenos = options.get('linenos', False) if linenos == 'inline': self.linenos = 2 @@ -352,6 +363,7 @@ def __init__(self, **options): self.nobackground = get_bool_opt(options, 'nobackground', False) self.lineseparator = options.get('lineseparator', '\n') self.lineanchors = options.get('lineanchors', '') + self.anchorlinenos = options.get('anchorlinenos', False) self.hl_lines = set() for lineno in get_list_opt(options, 'hl_lines', []): try: @@ -433,10 +445,12 @@ def prefix(cls): (prefix(''), self.style.highlight_color)) return '\n'.join(lines) - def _encodeifneeded(self, value): - if not self.encoding or isinstance(value, str): - return value - return value.encode(self.encoding) + def _decodeifneeded(self, value): + if isinstance(value, bytes): + if self.encoding: + return value.decode(self.encoding) + return value.decode() + return value def _wrap_full(self, inner, outfile): if self.cssfile: @@ -449,17 +463,19 @@ def _wrap_full(self, inner, outfile): if not filename or filename[0] == '<': # pseudo files, e.g. name == '' raise AttributeError - cssfilename = os.path.join(os.path.dirname(filename), self.cssfile) + cssfilename = os.path.join(os.path.dirname(filename), + self.cssfile) except AttributeError: print >>sys.stderr, 'Note: Cannot determine output file name, ' \ 'using current directory as base for the CSS file name' cssfilename = self.cssfile - # write CSS file + # write CSS file only if noclobber_cssfile isn't given as an option. try: - cf = open(cssfilename, "w") - cf.write(CSSFILE_TEMPLATE % - {'styledefs': self.get_style_defs('body')}) - cf.close() + if not os.path.exists(cssfilename) or not self.noclobber_cssfile: + cf = open(cssfilename, "w") + cf.write(CSSFILE_TEMPLATE % + {'styledefs': self.get_style_defs('body')}) + cf.close() except IOError, err: err.strerror = 'Error writing CSS file: ' + err.strerror raise @@ -490,19 +506,45 @@ def _wrap_tablelinenos(self, inner): mw = len(str(lncount + fl - 1)) sp = self.linenospecial st = self.linenostep + la = self.lineanchors + aln = self.anchorlinenos if sp: - ls = '\n'.join([(i%st == 0 and - (i%sp == 0 and '%*d' - or '%*d') % (mw, i) - or '') - for i in range(fl, fl + lncount)]) + lines = [] + + for i in range(fl, fl+lncount): + if i % st == 0: + if i % sp == 0: + if aln: + lines.append('%*d' % + (la, i, mw, i)) + else: + lines.append('%*d' % (mw, i)) + else: + if aln: + lines.append('%*d' % (la, i, mw, i)) + else: + lines.append('%*d' % (mw, i)) + else: + lines.append('') + ls = '\n'.join(lines) else: - ls = '\n'.join([(i%st == 0 and ('%*d' % (mw, i)) or '') - for i in range(fl, fl + lncount)]) + lines = [] + for i in range(fl, fl+lncount): + if i % st == 0: + if aln: + lines.append('%*d' % (la, i, mw, i)) + else: + lines.append('%*d' % (mw, i)) + else: + lines.append('') + ls = '\n'.join(lines) + # in case you wonder about the seemingly redundant
here: since the + # content in the other cell also is wrapped in a div, some browsers in + # some configurations seem to mess up the formatting... yield 0, ('' % self.cssclass + - '
' +
-                  ls + '
') + '
' +
+                  ls + '
') yield 0, dummyoutfile.getvalue() yield 0, '
' @@ -517,7 +559,8 @@ def _wrap_inlinelinenos(self, inner): if sp: for t, line in lines: yield 1, '%*s ' % ( - num%sp == 0 and ' special' or '', mw, (num%st and ' ' or num)) + line + num%sp == 0 and ' special' or '', mw, + (num%st and ' ' or num)) + line num += 1 else: for t, line in lines: @@ -536,15 +579,29 @@ def _wrap_lineanchors(self, inner): yield 0, line def _wrap_div(self, inner): + style = [] + if (self.noclasses and not self.nobackground and + self.style.background_color is not None): + style.append('background: %s' % (self.style.background_color,)) + if self.cssstyles: + style.append(self.cssstyles) + style = '; '.join(style) + yield 0, ('') + + (style and (' style="%s"' % style)) + '>') for tup in inner: yield tup yield 0, '
\n' def _wrap_pre(self, inner): - yield 0, ('') + style = [] + if self.prestyles: + style.append(self.prestyles) + if self.noclasses: + style.append('line-height: 125%') + style = '; '.join(style) + + yield 0, ('') for tup in inner: yield tup yield 0, '' @@ -555,7 +612,6 @@ def _format_lines(self, tokensource): Yield individual lines. """ nocls = self.noclasses - enc = self.encoding lsep = self.lineseparator # for lookup only getcls = self.ttype2class.get @@ -574,9 +630,6 @@ def _format_lines(self, tokensource): cls = self._get_css_class(ttype) cspan = cls and '' % cls or '' - if enc: - value = value.encode(enc) - parts = escape_html(value).split('\n') # for all but the last line @@ -619,7 +672,14 @@ def _highlight_lines(self, tokensource): if t != 1: yield t, value if i + 1 in hls: # i + 1 because Python indexes start at 0 - yield 1, '%s' % value + if self.noclasses: + style = '' + if self.style.highlight_color is not None: + style = (' style="background-color: %s"' % + (self.style.highlight_color,)) + yield 1, '%s' % (style, value) + else: + yield 1, '%s' % value else: yield 1, value @@ -631,7 +691,7 @@ def wrap(self, source, outfile): """ return self._wrap_div(self._wrap_pre(source)) - def format(self, tokensource, outfile): + def format_unencoded(self, tokensource, outfile): """ The formatting process uses several nested generators; which of them are used is determined by the user's options. diff --git a/pygments_package/pygments/formatters/img.py b/pygments_package/pygments/formatters/img.py index ef3579d..69ac484 100644 --- a/pygments_package/pygments/formatters/img.py +++ b/pygments_package/pygments/formatters/img.py @@ -5,15 +5,16 @@ Formatter for Pixmap output. - :copyright: 2007 by Ali Afshar. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import sys from commands import getstatusoutput from pygments.formatter import Formatter -from pygments.util import get_bool_opt, get_int_opt, get_choice_opt +from pygments.util import get_bool_opt, get_int_opt, \ + get_list_opt, get_choice_opt # Import this carefully try: @@ -61,6 +62,7 @@ def __init__(self, font_name, font_size=14): self.font_name = font_name self.font_size = font_size self.fonts = {} + self.encoding = None if sys.platform.startswith('win'): if not font_name: self.font_name = DEFAULT_FONT_NAME_WIN @@ -206,6 +208,11 @@ class ImageFormatter(Formatter): Default: True + `line_number_start` + The line number of the first line. + + Default: 1 + `line_number_step` The step used when printing line numbers. @@ -249,6 +256,16 @@ class ImageFormatter(Formatter): the source code area. Default: 6 + + `hl_lines` + Specify a list of lines to be highlighted. *New in Pygments 1.2.* + + Default: empty list + + `hl_color` + Specify the color for highlighting lines. *New in Pygments 1.2.* + + Default: highlight color of the selected style """ # Required by the pygments mapper @@ -298,11 +315,21 @@ def __init__(self, **options): self.line_number_separator = get_bool_opt(options, 'line_number_separator', True) self.line_number_step = get_int_opt(options, 'line_number_step', 1) + self.line_number_start = get_int_opt(options, 'line_number_start', 1) if self.line_numbers: self.line_number_width = (self.fontw * self.line_number_chars + self.line_number_pad * 2) else: self.line_number_width = 0 + self.hl_lines = [] + hl_lines_str = get_list_opt(options, 'hl_lines', []) + for line in hl_lines_str: + try: + self.hl_lines.append(int(line)) + except ValueError: + pass + self.hl_color = options.get('hl_color', + self.style.highlight_color) or '#f90' self.drawables = [] def get_style_defs(self, arg=''): @@ -368,13 +395,13 @@ def _get_image_size(self, maxcharno, maxlineno): return (self._get_char_x(maxcharno) + self.image_pad, self._get_line_y(maxlineno + 0) + self.image_pad) - def _draw_linenumber(self, lineno): + def _draw_linenumber(self, posno, lineno): """ Remember a line number drawable to paint later. """ self._draw_text( - self._get_linenumber_pos(lineno), - str(lineno + 1).rjust(self.line_number_chars), + self._get_linenumber_pos(posno), + str(lineno).rjust(self.line_number_chars), font=self.fonts.get_font(self.line_number_bold, self.line_number_italic), fill=self.line_number_fg, @@ -395,26 +422,27 @@ def _create_drawables(self, tokensource): while ttype not in self.styles: ttype = ttype.parent style = self.styles[ttype] + # TODO: make sure tab expansion happens earlier in the chain. It + # really ought to be done on the input, as to do it right here is + # quite complex. value = value.expandtabs(4) - lines = value.splitlines() + lines = value.splitlines(True) #print lines for i, line in enumerate(lines): - if not line: - lineno += 1 - charno = 0 - else: - # add a line for each extra line in the value - if i: - lineno += 1 - charno = 0 + temp = line.rstrip('\n') + if temp: self._draw_text( self._get_text_pos(charno, lineno), - line, + temp, font = self._get_style_font(style), fill = self._get_text_color(style) ) - charno += len(value) + charno += len(temp) maxcharno = max(maxcharno, charno) + if line.endswith('\n'): + # add a line for each extra line in the value + charno = 0 + lineno += 1 self.maxcharno = maxcharno self.maxlineno = lineno @@ -424,9 +452,10 @@ def _draw_line_numbers(self): """ if not self.line_numbers: return - for i in xrange(self.maxlineno): - if ((i + 1) % self.line_number_step) == 0: - self._draw_linenumber(i) + for p in xrange(self.maxlineno): + n = p + self.line_number_start + if (n % self.line_number_step) == 0: + self._draw_linenumber(p, n) def _paint_line_number_bg(self, im): """ @@ -462,6 +491,15 @@ def format(self, tokensource, outfile): ) self._paint_line_number_bg(im) draw = ImageDraw.Draw(im) + # Highlight + if self.hl_lines: + x = self.image_pad + self.line_number_width - self.line_number_pad + 1 + recth = self._get_line_height() + rectw = im.size[0] - x + for linenumber in self.hl_lines: + y = self._get_line_y(linenumber - 1) + draw.rectangle([(x, y), (x + rectw, y + recth)], + fill=self.hl_color) for pos, value, font, kw in self.drawables: draw.text(pos, value, font=font, **kw) im.save(outfile, self.image_format.upper()) diff --git a/pygments_package/pygments/formatters/latex.py b/pygments_package/pygments/formatters/latex.py index af8da1b..4715b04 100644 --- a/pygments_package/pygments/formatters/latex.py +++ b/pygments_package/pygments/formatters/latex.py @@ -5,26 +5,29 @@ Formatter for LaTeX fancyvrb output. - :copyright: 2006-2008 by Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ -import StringIO from pygments.formatter import Formatter -from pygments.token import Token -from pygments.util import get_bool_opt, get_int_opt +from pygments.token import Token, STANDARD_TYPES +from pygments.util import get_bool_opt, get_int_opt, StringIO __all__ = ['LatexFormatter'] def escape_tex(text, commandprefix): - return text.replace('@', '\x00'). \ - replace('[', '\x01'). \ - replace(']', '\x02'). \ - replace('\x00', '@%sZat[]' % commandprefix).\ - replace('\x01', '@%sZlb[]' % commandprefix).\ - replace('\x02', '@%sZrb[]' % commandprefix) + return text.replace('\\', '\x00'). \ + replace('{', '\x01'). \ + replace('}', '\x02'). \ + replace('^', '\x03'). \ + replace('_', '\x04'). \ + replace('\x00', r'\%sZbs{}' % commandprefix). \ + replace('\x01', r'\%sZob{}' % commandprefix). \ + replace('\x02', r'\%sZcb{}' % commandprefix). \ + replace('\x03', r'\%sZca{}' % commandprefix). \ + replace('\x04', r'\%sZus{}' % commandprefix) DOC_TEMPLATE = r''' @@ -44,6 +47,79 @@ def escape_tex(text, commandprefix): \end{document} ''' +## Small explanation of the mess below :) +# +# The previous version of the LaTeX formatter just assigned a command to +# each token type defined in the current style. That obviously is +# problematic if the highlighted code is produced for a different style +# than the style commands themselves. +# +# This version works much like the HTML formatter which assigns multiple +# CSS classes to each tag, from the most specific to the least +# specific token type, thus falling back to the parent token type if one +# is not defined. Here, the classes are there too and use the same short +# forms given in token.STANDARD_TYPES. +# +# Highlighted code now only uses one custom command, which by default is +# \PY and selectable by the commandprefix option (and in addition the +# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for +# backwards compatibility purposes). +# +# \PY has two arguments: the classes, separated by +, and the text to +# render in that style. The classes are resolved into the respective +# style commands by magic, which serves to ignore unknown classes. +# +# The magic macros are: +# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text +# to render in \PY@do. Their definition determines the style. +# * \PY@reset resets \PY@it etc. to do nothing. +# * \PY@toks parses the list of classes, using magic inspired by the +# keyval package (but modified to use plusses instead of commas +# because fancyvrb redefines commas inside its environments). +# * \PY@tok processes one class, calling the \PY@tok@classname command +# if it exists. +# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style +# for its class. +# * \PY resets the style, parses the classnames and then calls \PY@do. + +STYLE_TEMPLATE = r''' +\makeatletter +\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% + \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% + \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} +\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} +\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% + \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} +\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% + \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} +\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} + +%(styles)s + +\def\%(cp)sZbs{\char`\\} +\def\%(cp)sZus{\char`\_} +\def\%(cp)sZob{\char`\{} +\def\%(cp)sZcb{\char`\}} +\def\%(cp)sZca{\char`\^} +%% for compatibility with earlier versions +\def\%(cp)sZat{@} +\def\%(cp)sZlb{[} +\def\%(cp)sZrb{]} +\makeatother +''' + + +def _get_ttype_name(ttype): + fname = STANDARD_TYPES.get(ttype) + if fname: + return fname + aname = '' + while fname is None: + aname = ttype[-1] + aname + ttype = ttype.parent + fname = STANDARD_TYPES.get(ttype) + return fname + aname + class LatexFormatter(Formatter): r""" @@ -56,18 +132,18 @@ class LatexFormatter(Formatter): .. sourcecode:: latex \begin{Verbatim}[commandchars=@\[\]] - @PYan[def ]@PYax[foo](bar): - @PYan[pass] + @PY[k][def ]@PY[n+nf][foo](@PY[n][bar]): + @PY[k][pass] \end{Verbatim} - The command sequences used here (``@PYan`` etc.) are generated from the given - `style` and can be retrieved using the `get_style_defs` method. + The special command used here (``@PY``) and all the other macros it needs + are output by the `get_style_defs` method. With the `full` option, a complete LaTeX document is output, including the command definitions in the preamble. The `get_style_defs()` method of a `LatexFormatter` returns a string - containing ``\newcommand`` commands defining the commands used inside the + containing ``\def`` commands defining the macros needed inside the ``Verbatim`` environments. Additional options accepted: @@ -111,6 +187,16 @@ class LatexFormatter(Formatter): *New in Pygments 0.7.* *New in Pygments 0.10:* the default is now ``'PY'`` instead of ``'C'``. + + `texcomments` + If set to ``True``, enables LaTeX comment lines. That is, LaTex markup + in comment tokens is not escaped so that LaTeX can render it (default: + ``False``). *New in Pygments 1.2.* + + `mathescape` + If set to ``True``, enables LaTeX math mode escape in comments. That + is, ``'$...$'`` inside a comment will trigger math mode (default: + ``False``). *New in Pygments 1.2.* """ name = 'LaTeX' aliases = ['latex', 'tex'] @@ -126,20 +212,17 @@ def __init__(self, **options): self.verboptions = options.get('verboptions', '') self.nobackground = get_bool_opt(options, 'nobackground', False) self.commandprefix = options.get('commandprefix', 'PY') + self.texcomments = get_bool_opt(options, 'texcomments', False) + self.mathescape = get_bool_opt(options, 'mathescape', False) - self._create_stylecmds() + self._create_stylesheet() - def _create_stylecmds(self): - t2c = self.ttype2cmd = {Token: ''} + def _create_stylesheet(self): + t2n = self.ttype2name = {Token: ''} c2d = self.cmd2def = {} cp = self.commandprefix - letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - first = iter(letters) - second = iter(letters) - firstl = first.next() - def rgbcolor(col): if col: return ','.join(['%.2f' %(int(col[i] + col[i + 1], 16) / 255.0) @@ -148,93 +231,114 @@ def rgbcolor(col): return '1,1,1' for ttype, ndef in self.style: - cmndef = '#1' + name = _get_ttype_name(ttype) + cmndef = '' if ndef['bold']: - cmndef = r'\textbf{' + cmndef + '}' + cmndef += r'\let\$$@bf=\textbf' if ndef['italic']: - cmndef = r'\textit{' + cmndef + '}' + cmndef += r'\let\$$@it=\textit' if ndef['underline']: - cmndef = r'\underline{' + cmndef + '}' + cmndef += r'\let\$$@ul=\underline' if ndef['roman']: - cmndef = r'\textrm{' + cmndef + '}' + cmndef += r'\let\$$@ff=\textrm' if ndef['sans']: - cmndef = r'\textsf{' + cmndef + '}' + cmndef += r'\let\$$@ff=\textsf' if ndef['mono']: - cmndef = r'\texttt{' + cmndef + '}' + cmndef += r'\let\$$@ff=\textsf' if ndef['color']: - cmndef = r'\textcolor[rgb]{%s}{%s}' % ( - rgbcolor(ndef['color']), - cmndef - ) + cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % + rgbcolor(ndef['color'])) if ndef['border']: - cmndef = r'\fcolorbox[rgb]{%s}{%s}{%s}' % ( - rgbcolor(ndef['border']), - rgbcolor(ndef['bgcolor']), - cmndef - ) + cmndef += (r'\def\$$@bc##1{\fcolorbox[rgb]{%s}{%s}{##1}}' % + (rgbcolor(ndef['border']), + rgbcolor(ndef['bgcolor']))) elif ndef['bgcolor']: - cmndef = r'\colorbox[rgb]{%s}{%s}' % ( - rgbcolor(ndef['bgcolor']), - cmndef - ) - if cmndef == '#1': + cmndef += (r'\def\$$@bc##1{\colorbox[rgb]{%s}{##1}}' % + rgbcolor(ndef['bgcolor'])) + if cmndef == '': continue - try: - alias = cp + firstl + second.next() - except StopIteration: - firstl = first.next() - second = iter(letters) - alias = cp + firstl + second.next() - t2c[ttype] = alias - c2d[alias] = cmndef + cmndef = cmndef.replace('$$', cp) + t2n[ttype] = name + c2d[name] = cmndef def get_style_defs(self, arg=''): """ - Return the \\newcommand sequences needed to define the commands + Return the command sequences needed to define the commands used to format text in the verbatim environment. ``arg`` is ignored. """ - nc = '\\newcommand' cp = self.commandprefix - return ( - '%s\\%sZat{@}\n%s\\%sZlb{[}\n%s\\%sZrb{]}\n' % (nc, cp, nc, cp, nc, cp) + - '\n'.join(['\\newcommand\\%s[1]{%s}' % (alias, cmndef) - for alias, cmndef in self.cmd2def.iteritems() - if cmndef != '#1'])) + styles = [] + for name, definition in self.cmd2def.iteritems(): + styles.append(r'\def\%s@tok@%s{%s}' % (cp, name, definition)) + return STYLE_TEMPLATE % {'cp': self.commandprefix, + 'styles': '\n'.join(styles)} - def format(self, tokensource, outfile): + def format_unencoded(self, tokensource, outfile): # TODO: add support for background colors - enc = self.encoding + t2n = self.ttype2name + cp = self.commandprefix if self.full: realoutfile = outfile - outfile = StringIO.StringIO() + outfile = StringIO() - outfile.write(r'\begin{Verbatim}[commandchars=@\[\]') + outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}') if self.linenos: start, step = self.linenostart, self.linenostep outfile.write(',numbers=left' + (start and ',firstnumber=%d' % start or '') + (step and ',stepnumber=%d' % step or '')) + if self.mathescape or self.texcomments: + outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') if self.verboptions: outfile.write(',' + self.verboptions) outfile.write(']\n') for ttype, value in tokensource: - if enc: - value = value.encode(enc) - value = escape_tex(value, self.commandprefix) - cmd = self.ttype2cmd.get(ttype) - while cmd is None: + if ttype in Token.Comment: + if self.texcomments: + # Try to guess comment starting lexeme and escape it ... + start = value[0:1] + for i in xrange(1, len(value)): + if start[0] != value[i]: + break + start += value[i] + + value = value[len(start):] + start = escape_tex(start, self.commandprefix) + + # ... but do not escape inside comment. + value = start + value + elif self.mathescape: + # Only escape parts not inside a math environment. + parts = value.split('$') + in_math = False + for i, part in enumerate(parts): + if not in_math: + parts[i] = escape_tex(part, self.commandprefix) + in_math = not in_math + value = '$'.join(parts) + else: + value = escape_tex(value, self.commandprefix) + else: + value = escape_tex(value, self.commandprefix) + styles = [] + while ttype is not Token: + try: + styles.append(t2n[ttype]) + except KeyError: + # not in current style + styles.append(_get_ttype_name(ttype)) ttype = ttype.parent - cmd = self.ttype2cmd.get(ttype) - if cmd: + styleval = '+'.join(reversed(styles)) + if styleval: spl = value.split('\n') for line in spl[:-1]: if line: - outfile.write("@%s[%s]" % (cmd, line)) + outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) outfile.write('\n') if spl[-1]: - outfile.write("@%s[%s]" % (cmd, spl[-1])) + outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) else: outfile.write(value) diff --git a/pygments_package/pygments/formatters/other.py b/pygments_package/pygments/formatters/other.py index fb12368..249291a 100644 --- a/pygments_package/pygments/formatters/other.py +++ b/pygments_package/pygments/formatters/other.py @@ -5,12 +5,12 @@ Other formatters: NullFormatter, RawTokenFormatter. - :copyright: 2006-2007 by Georg Brandl, Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ from pygments.formatter import Formatter -from pygments.util import get_choice_opt +from pygments.util import OptionError, get_choice_opt, b from pygments.token import Token from pygments.console import colorize @@ -42,7 +42,7 @@ class RawTokenFormatter(Formatter): be converted to a token stream with the `RawTokenLexer`, described in the `lexer list `_. - Only one option is accepted: + Only two options are accepted: `compress` If set to ``'gz'`` or ``'bz2'``, compress the output with the given @@ -61,6 +61,10 @@ class RawTokenFormatter(Formatter): def __init__(self, **options): Formatter.__init__(self, **options) + if self.encoding: + raise OptionError('the raw formatter does not support the ' + 'encoding option') + self.encoding = 'ascii' # let pygments.format() do the right thing self.compress = get_choice_opt(options, 'compress', ['', 'none', 'gz', 'bz2'], '') self.error_color = options.get('error_color', None) @@ -74,21 +78,28 @@ def __init__(self, **options): self.error_color) def format(self, tokensource, outfile): + try: + outfile.write(b('')) + except TypeError: + raise TypeError('The raw tokens formatter needs a binary ' + 'output file') if self.compress == 'gz': import gzip outfile = gzip.GzipFile('', 'wb', 9, outfile) - write = outfile.write + def write(text): + outfile.write(text.encode()) flush = outfile.flush elif self.compress == 'bz2': import bz2 compressor = bz2.BZ2Compressor(9) def write(text): - outfile.write(compressor.compress(text)) + outfile.write(compressor.compress(text.encode())) def flush(): outfile.write(compressor.flush()) outfile.flush() else: - write = outfile.write + def write(text): + outfile.write(text.encode()) flush = outfile.flush lasttype = None diff --git a/pygments_package/pygments/formatters/rtf.py b/pygments_package/pygments/formatters/rtf.py index 3d430fe..83414c5 100644 --- a/pygments_package/pygments/formatters/rtf.py +++ b/pygments_package/pygments/formatters/rtf.py @@ -5,8 +5,8 @@ A formatter that generates RTF files. - :copyright: 2006-2007 by Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ from pygments.formatter import Formatter @@ -52,8 +52,6 @@ def __init__(self, **options): """ Formatter.__init__(self, **options) self.fontface = options.get('fontface') or '' - if self.encoding in ('utf-8', 'utf-16', 'utf-32'): - self.encoding = None def _escape(self, text): return text.replace('\\', '\\\\') \ @@ -67,7 +65,10 @@ def _escape_text(self, text): # escape text text = self._escape(text) - encoding = self.encoding or 'iso-8859-15' + if self.encoding in ('utf-8', 'utf-16', 'utf-32'): + encoding = 'iso-8859-15' + else: + encoding = self.encoding or 'iso-8859-15' buf = [] for c in text: @@ -75,13 +76,15 @@ def _escape_text(self, text): ansic = c.encode(encoding, 'ignore') or '?' if ord(ansic) > 128: ansic = '\\\'%x' % ord(ansic) + else: + ansic = c buf.append(r'\ud{\u%d%s}' % (ord(c), ansic)) else: buf.append(str(c)) return ''.join(buf).replace('\n', '\\par\n') - def format(self, tokensource, outfile): + def format_unencoded(self, tokensource, outfile): # rtf 1.8 header outfile.write(r'{\rtf1\ansi\deff0' r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}' diff --git a/pygments_package/pygments/formatters/svg.py b/pygments_package/pygments/formatters/svg.py index 0b882aa..9928345 100644 --- a/pygments_package/pygments/formatters/svg.py +++ b/pygments_package/pygments/formatters/svg.py @@ -5,10 +5,9 @@ Formatter for SVG output. - :copyright: 2007 by Matthew Harrison, Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ -import StringIO from pygments.formatter import Formatter from pygments.util import get_bool_opt, get_int_opt @@ -67,7 +66,7 @@ class SvgFormatter(Formatter): size is given in pixels, or ``25`` else. `spacehack` - Convert spaces in the source to ``&160;``, which are non-breaking + Convert spaces in the source to `` ``, which are non-breaking spaces. SVG provides the ``xml:space`` attribute to control how whitespace inside tags is handled, in theory, the ``preserve`` value could be used to keep all whitespace as-is. However, many current SVG @@ -96,7 +95,7 @@ def __init__(self, **options): self.spacehack = get_bool_opt(options, 'spacehack', True) self._stylecache = {} - def format(self, tokensource, outfile): + def format_unencoded(self, tokensource, outfile): """ Format ``tokensource``, an iterable of ``(tokentype, tokenstring)`` tuples and write it into ``outfile``. @@ -105,22 +104,20 @@ def format(self, tokensource, outfile): """ x = self.xoffset y = self.yoffset - enc = self.encoding if not self.nowrap: - if enc: - outfile.write('\n' % self.encoding) + if self.encoding: + outfile.write('\n' % + self.encoding) else: outfile.write('\n') outfile.write('\n') outfile.write('\n') - outfile.write('\n' % (self.fontfamily, - self.fontsize)) + outfile.write('\n' % + (self.fontfamily, self.fontsize)) outfile.write('' % (x, y)) for ttype, value in tokensource: - if enc: - value = value.encode(enc) style = self._get_style(ttype) tspan = style and '' or '' tspanend = tspan and '' or '' diff --git a/pygments_package/pygments/formatters/terminal.py b/pygments_package/pygments/formatters/terminal.py index ce1a8e9..fc05cad 100644 --- a/pygments_package/pygments/formatters/terminal.py +++ b/pygments_package/pygments/formatters/terminal.py @@ -5,8 +5,8 @@ Formatter for terminal output with ANSI sequences. - :copyright: 2006-2007 by Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ from pygments.formatter import Formatter @@ -78,19 +78,20 @@ class TerminalFormatter(Formatter): def __init__(self, **options): Formatter.__init__(self, **options) - self.darkbg = get_choice_opt(options, 'bg', ['light', 'dark'], 'light') == 'dark' + self.darkbg = get_choice_opt(options, 'bg', + ['light', 'dark'], 'light') == 'dark' self.colorscheme = options.get('colorscheme', None) or TERMINAL_COLORS def format(self, tokensource, outfile): - enc = self.encoding # hack: if the output is a terminal and has an encoding set, # use that to avoid unicode encode problems - if not enc and hasattr(outfile, "encoding") and \ + if not self.encoding and hasattr(outfile, "encoding") and \ hasattr(outfile, "isatty") and outfile.isatty(): - enc = outfile.encoding + self.encoding = outfile.encoding + return Formatter.format(self, tokensource, outfile) + + def format_unencoded(self, tokensource, outfile): for ttype, value in tokensource: - if enc: - value = value.encode(enc) color = self.colorscheme.get(ttype) while color is None: ttype = ttype[:-1] diff --git a/pygments_package/pygments/formatters/terminal256.py b/pygments_package/pygments/formatters/terminal256.py index 0516aa4..d72a939 100644 --- a/pygments_package/pygments/formatters/terminal256.py +++ b/pygments_package/pygments/formatters/terminal256.py @@ -11,8 +11,8 @@ Formatter version 1. - :copyright: 2007 by Artem Egorkine. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ # TODO: @@ -124,8 +124,8 @@ def _build_color_table(self): valuerange = (0x00, 0x5f, 0x87, 0xaf, 0xd7, 0xff) for i in range(217): - r = valuerange[(i / 36) % 6] - g = valuerange[(i / 6) % 6] + r = valuerange[(i // 36) % 6] + g = valuerange[(i // 6) % 6] b = valuerange[i % 6] self.xterm_colors.append((r, g, b)) @@ -182,17 +182,15 @@ def _setup_styles(self): escape.reset_string()) def format(self, tokensource, outfile): - enc = self.encoding # hack: if the output is a terminal and has an encoding set, # use that to avoid unicode encode problems - if not enc and hasattr(outfile, "encoding") and \ + if not self.encoding and hasattr(outfile, "encoding") and \ hasattr(outfile, "isatty") and outfile.isatty(): - enc = outfile.encoding + self.encoding = outfile.encoding + return Formatter.format(self, tokensource, outfile) + def format_unencoded(self, tokensource, outfile): for ttype, value in tokensource: - if enc: - value = value.encode(enc) - not_found = True while ttype and not_found: try: diff --git a/pygments_package/pygments/lexer.py b/pygments_package/pygments/lexer.py index cba93e4..fbcc39a 100644 --- a/pygments_package/pygments/lexer.py +++ b/pygments_package/pygments/lexer.py @@ -5,16 +5,11 @@ Base lexer classes. - :copyright: 2006-2007 by Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set - from pygments.filter import apply_filters, Filter from pygments.filters import get_filter_by_name from pygments.token import Error, Text, Other, _TokenType @@ -23,7 +18,7 @@ __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', - 'LexerContext', 'include', 'flags', 'bygroups', 'using', 'this'] + 'LexerContext', 'include', 'bygroups', 'using', 'this'] _default_analyse = staticmethod(lambda x: 0.0) @@ -51,6 +46,10 @@ class Lexer(object): ``stripall`` Strip all leading and trailing whitespace from the input (default: False). + ``ensurenl`` + Make sure that the input ends with a newline (default: True). This + is required for some lexers that consume input linewise. + *New in Pygments 1.3.* ``tabsize`` If given and greater than 0, expand tabs in the input (default: 0). ``encoding`` @@ -82,6 +81,7 @@ def __init__(self, **options): self.options = options self.stripnl = get_bool_opt(options, 'stripnl', True) self.stripall = get_bool_opt(options, 'stripall', False) + self.ensurenl = get_bool_opt(options, 'ensurenl', True) self.tabsize = get_int_opt(options, 'tabsize', 0) self.encoding = options.get('encoding', 'latin1') # self.encoding = options.get('inencoding', None) or self.encoding @@ -127,8 +127,6 @@ def get_tokens(self, text, unfiltered=False): Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ - text = text.replace('\r\n', '\n') - text = text.replace('\r', '\n') if not isinstance(text, unicode): if self.encoding == 'guess': try: @@ -148,13 +146,16 @@ def get_tokens(self, text, unfiltered=False): text = text.decode(enc['encoding']) else: text = text.decode(self.encoding) + # text now *is* a unicode string + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) - if not text.endswith('\n'): + if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): @@ -477,7 +478,6 @@ def get_tokens_unprocessed(self, text, stack=('root',)): for rexmatch, action, new_state in statetokens: m = rexmatch(text, pos) if m: - # print rex.pattern if type(action) is _TokenType: yield pos, action, m.group() else: @@ -646,9 +646,15 @@ def do_insertions(insertions, tokens): realpos += len(v) - oldi # leftover tokens - if insleft: + while insleft: # no normal tokens, set realpos to zero realpos = realpos or 0 for p, t, v in itokens: yield realpos, t, v realpos += len(v) + try: + index, itokens = insertions.next() + except StopIteration: + insleft = False + break # not strictly necessary + diff --git a/pygments_package/pygments/lexers/__init__.py b/pygments_package/pygments/lexers/__init__.py index 65b8c68..cce7e9b 100644 --- a/pygments_package/pygments/lexers/__init__.py +++ b/pygments_package/pygments/lexers/__init__.py @@ -5,21 +5,18 @@ Pygments lexers. - :copyright: 2006-2007 by Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ -import fnmatch + +import sys import types +import fnmatch from os.path import basename -try: - set -except NameError: - from sets import Set as set - from pygments.lexers._mapping import LEXERS from pygments.plugin import find_plugin_lexers -from pygments.util import ClassNotFound +from pygments.util import ClassNotFound, bytes __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', @@ -83,21 +80,43 @@ def get_lexer_by_name(_alias, **options): raise ClassNotFound('no lexer for alias %r found' % _alias) -def get_lexer_for_filename(_fn, **options): +def get_lexer_for_filename(_fn, code=None, **options): """ - Get a lexer for a filename. + Get a lexer for a filename. If multiple lexers match the filename + pattern, use ``analyze_text()`` to figure out which one is more + appropriate. """ + matches = [] fn = basename(_fn) for modname, name, _, filenames, _ in LEXERS.itervalues(): for filename in filenames: if fnmatch.fnmatch(fn, filename): if name not in _lexer_cache: _load_lexers(modname) - return _lexer_cache[name](**options) + matches.append(_lexer_cache[name]) for cls in find_plugin_lexers(): for filename in cls.filenames: if fnmatch.fnmatch(fn, filename): - return cls(**options) + matches.append(cls) + + if sys.version_info > (3,) and isinstance(code, bytes): + # decode it, since all analyse_text functions expect unicode + code = code.decode('latin1') + + def get_rating(cls): + # The class _always_ defines analyse_text because it's included in + # the Lexer class. The default implementation returns None which + # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py + # to find lexers which need it overridden. + d = cls.analyse_text(code) + #print "Got %r from %r" % (d, cls) + return d + + if code: + matches.sort(key=get_rating) + if matches: + #print "Possible lexers, after sort:", matches + return matches[-1](**options) raise ClassNotFound('no lexer for filename %r found' % _fn) @@ -199,11 +218,9 @@ def __getattr__(self, name): return cls raise AttributeError(name) -''' -import sys + oldmod = sys.modules['pygments.lexers'] newmod = _automodule('pygments.lexers') newmod.__dict__.update(oldmod.__dict__) sys.modules['pygments.lexers'] = newmod del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types -''' \ No newline at end of file diff --git a/pygments_package/pygments/lexers/_clbuiltins.py b/pygments_package/pygments/lexers/_clbuiltins.py index 01feeed..03e5cad 100644 --- a/pygments_package/pygments/lexers/_clbuiltins.py +++ b/pygments_package/pygments/lexers/_clbuiltins.py @@ -5,8 +5,8 @@ ANSI Common Lisp builtins. - :copyright: 2006-2007 by Matteo Sasso. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ BUILTIN_FUNCTIONS = [ # 638 functions diff --git a/pygments_package/pygments/lexers/_luabuiltins.py b/pygments_package/pygments/lexers/_luabuiltins.py index 3a99365..c475c9c 100644 --- a/pygments_package/pygments/lexers/_luabuiltins.py +++ b/pygments_package/pygments/lexers/_luabuiltins.py @@ -9,8 +9,8 @@ Do not edit the MODULES dict by hand. - :copyright: 2006-2007 by Lukas Meuser. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ MODULES = {'basic': ['_G', diff --git a/pygments_package/pygments/lexers/_mapping.py b/pygments_package/pygments/lexers/_mapping.py index 5e743c0..cdaf56a 100644 --- a/pygments_package/pygments/lexers/_mapping.py +++ b/pygments_package/pygments/lexers/_mapping.py @@ -9,30 +9,48 @@ Do not alter the LEXERS dictionary by hand. - :copyright: 2006-2007 by Armin Ronacher, Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ LEXERS = { + 'ABAPLexer': ('pygments.lexers.other', 'ABAP', ('abap',), ('*.abap',), ('text/x-abap',)), 'ActionScript3Lexer': ('pygments.lexers.web', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), 'ActionScriptLexer': ('pygments.lexers.web', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), + 'AdaLexer': ('pygments.lexers.compiled', 'Ada', ('ada', 'ada95ada2005'), ('*.adb', '*.ads', '*.ada'), ('text/x-ada',)), + 'AntlrActionScriptLexer': ('pygments.lexers.parsers', 'ANTLR With ActionScript Target', ('antlr-as', 'antlr-actionscript'), ('*.G', '*.g'), ()), + 'AntlrCSharpLexer': ('pygments.lexers.parsers', 'ANTLR With C# Target', ('antlr-csharp', 'antlr-c#'), ('*.G', '*.g'), ()), + 'AntlrCppLexer': ('pygments.lexers.parsers', 'ANTLR With CPP Target', ('antlr-cpp',), ('*.G', '*.g'), ()), + 'AntlrJavaLexer': ('pygments.lexers.parsers', 'ANTLR With Java Target', ('antlr-java',), ('*.G', '*.g'), ()), + 'AntlrLexer': ('pygments.lexers.parsers', 'ANTLR', ('antlr',), (), ()), + 'AntlrObjectiveCLexer': ('pygments.lexers.parsers', 'ANTLR With ObjectiveC Target', ('antlr-objc',), ('*.G', '*.g'), ()), + 'AntlrPerlLexer': ('pygments.lexers.parsers', 'ANTLR With Perl Target', ('antlr-perl',), ('*.G', '*.g'), ()), + 'AntlrPythonLexer': ('pygments.lexers.parsers', 'ANTLR With Python Target', ('antlr-python',), ('*.G', '*.g'), ()), + 'AntlrRubyLexer': ('pygments.lexers.parsers', 'ANTLR With Ruby Target', ('antlr-ruby', 'antlr-rb'), ('*.G', '*.g'), ()), 'ApacheConfLexer': ('pygments.lexers.text', 'ApacheConf', ('apacheconf', 'aconf', 'apache'), ('.htaccess', 'apache.conf', 'apache2.conf'), ('text/x-apacheconf',)), 'AppleScriptLexer': ('pygments.lexers.other', 'AppleScript', ('applescript',), ('*.applescript',), ()), + 'AsymptoteLexer': ('pygments.lexers.other', 'Asymptote', ('asy', 'asymptote'), ('*.asy',), ('text/x-asymptote',)), 'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)), 'BaseMakefileLexer': ('pygments.lexers.text', 'Makefile', ('basemake',), (), ()), - 'BashLexer': ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript')), + 'BashLexer': ('pygments.lexers.other', 'Bash', ('bash', 'sh', 'ksh'), ('*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass'), ('application/x-sh', 'application/x-shellscript')), + 'BashSessionLexer': ('pygments.lexers.other', 'Bash Session', ('console',), ('*.sh-session',), ('application/x-shell-session',)), 'BatchLexer': ('pygments.lexers.other', 'Batchfile', ('bat',), ('*.bat', '*.cmd'), ('application/x-dos-batch',)), 'BefungeLexer': ('pygments.lexers.other', 'Befunge', ('befunge',), ('*.befunge',), ('application/x-befunge',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), 'BrainfuckLexer': ('pygments.lexers.other', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), 'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h'), ('text/x-chdr', 'text/x-csrc')), + 'CMakeLexer': ('pygments.lexers.text', 'CMake', ('cmake',), ('*.cmake',), ('text/x-cmake',)), 'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)), + 'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), 'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)), 'CheetahHtmlLexer': ('pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire'), (), ('text/html+cheetah', 'text/html+spitfire')), 'CheetahJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Cheetah', ('js+cheetah', 'javascript+cheetah', 'js+spitfire', 'javascript+spitfire'), (), ('application/x-javascript+cheetah', 'text/x-javascript+cheetah', 'text/javascript+cheetah', 'application/x-javascript+spitfire', 'text/x-javascript+spitfire', 'text/javascript+spitfire')), 'CheetahLexer': ('pygments.lexers.templates', 'Cheetah', ('cheetah', 'spitfire'), ('*.tmpl', '*.spt'), ('application/x-cheetah', 'application/x-spitfire')), 'CheetahXmlLexer': ('pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')), 'ClojureLexer': ('pygments.lexers.agile', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')), + 'CoffeeScriptLexer': ('pygments.lexers.web', 'CoffeeScript', ('coffee-script', 'coffeescript'), ('*.coffee',), ('text/coffeescript',)), + 'ColdfusionHtmlLexer': ('pygments.lexers.templates', 'Coldufsion HTML', ('cfm',), ('*.cfm', '*.cfml', '*.cfc'), ('application/x-coldfusion',)), + 'ColdfusionLexer': ('pygments.lexers.templates', 'cfstatement', ('cfs',), (), ()), 'CommonLispLexer': ('pygments.lexers.functional', 'Common Lisp', ('common-lisp', 'cl'), ('*.cl', '*.lisp', '*.el'), ('text/x-common-lisp',)), 'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx'), ('text/x-c++hdr', 'text/x-c++src')), 'CppObjdumpLexer': ('pygments.lexers.asm', 'cpp-objdump', ('cpp-objdump', 'c++-objdumb', 'cxx-objdump'), ('*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'), ('text/x-cpp-objdump',)), @@ -42,6 +60,7 @@ 'CssLexer': ('pygments.lexers.web', 'CSS', ('css',), ('*.css',), ('text/css',)), 'CssPhpLexer': ('pygments.lexers.templates', 'CSS+PHP', ('css+php',), (), ('text/css+php',)), 'CssSmartyLexer': ('pygments.lexers.templates', 'CSS+Smarty', ('css+smarty',), (), ('text/css+smarty',)), + 'CythonLexer': ('pygments.lexers.compiled', 'Cython', ('cython', 'pyx'), ('*.pyx', '*.pxd', '*.pxi'), ('text/x-cython', 'application/x-cython')), 'DLexer': ('pygments.lexers.compiled', 'D', ('d',), ('*.d', '*.di'), ('text/x-dsrc',)), 'DObjdumpLexer': ('pygments.lexers.asm', 'd-objdump', ('d-objdump',), ('*.d-objdump',), ('text/x-d-objdump',)), 'DarcsPatchLexer': ('pygments.lexers.text', 'Darcs Patch', ('dpatch',), ('*.dpatch', '*.darcspatch'), ()), @@ -52,14 +71,24 @@ 'DylanLexer': ('pygments.lexers.compiled', 'Dylan', ('dylan',), ('*.dylan',), ('text/x-dylan',)), 'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)), 'ErlangLexer': ('pygments.lexers.functional', 'Erlang', ('erlang',), ('*.erl', '*.hrl'), ('text/x-erlang',)), + 'ErlangShellLexer': ('pygments.lexers.functional', 'Erlang erl session', ('erl',), ('*.erl-sh',), ('text/x-erl-shellsession',)), + 'EvoqueHtmlLexer': ('pygments.lexers.templates', 'HTML+Evoque', ('html+evoque',), ('*.html',), ('text/html+evoque',)), + 'EvoqueLexer': ('pygments.lexers.templates', 'Evoque', ('evoque',), ('*.evoque',), ('application/x-evoque',)), + 'EvoqueXmlLexer': ('pygments.lexers.templates', 'XML+Evoque', ('xml+evoque',), ('*.xml',), ('application/xml+evoque',)), + 'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)), 'FortranLexer': ('pygments.lexers.compiled', 'Fortran', ('fortran',), ('*.f', '*.f90'), ('text/x-fortran',)), + 'GLShaderLexer': ('pygments.lexers.compiled', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)), 'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas',), ('*.s', '*.S'), ('text/x-gas',)), 'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')), 'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ('application/x-genshi-text', 'text/x-genshi')), 'GettextLexer': ('pygments.lexers.text', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')), + 'GherkinLexer': ('pygments.lexers.other', 'Gherkin', ('Cucumber', 'cucumber', 'Gherkin', 'gherkin'), ('*.feature',), ('text/x-gherkin',)), 'GnuplotLexer': ('pygments.lexers.other', 'Gnuplot', ('gnuplot',), ('*.plot', '*.plt'), ('text/x-gnuplot',)), + 'GoLexer': ('pygments.lexers.compiled', 'Go', ('go',), ('*.go',), ('text/x-gosrc',)), 'GroffLexer': ('pygments.lexers.text', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')), + 'HamlLexer': ('pygments.lexers.web', 'Haml', ('haml', 'HAML'), ('*.haml',), ('text/x-haml',)), 'HaskellLexer': ('pygments.lexers.functional', 'Haskell', ('haskell', 'hs'), ('*.hs',), ('text/x-haskell',)), + 'HaxeLexer': ('pygments.lexers.web', 'haXe', ('hx', 'haXe'), ('*.hx',), ('text/haxe',)), 'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja'), (), ('text/html+django', 'text/html+jinja')), 'HtmlGenshiLexer': ('pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), (), ('text/html+genshi',)), 'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')), @@ -91,8 +120,11 @@ 'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab', 'octave'), ('*.m',), ('text/matlab',)), 'MatlabSessionLexer': ('pygments.lexers.math', 'Matlab session', ('matlabsession',), (), ()), 'MiniDLexer': ('pygments.lexers.agile', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)), + 'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), + 'Modula2Lexer': ('pygments.lexers.compiled', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), 'MoinWikiLexer': ('pygments.lexers.text', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), 'MuPADLexer': ('pygments.lexers.math', 'MuPAD', ('mupad',), ('*.mu',), ()), + 'MxmlLexer': ('pygments.lexers.web', 'MXML', ('mxml',), ('*.mxml',), ()), 'MySqlLexer': ('pygments.lexers.other', 'MySQL', ('mysql',), (), ('text/x-mysql',)), 'MyghtyCssLexer': ('pygments.lexers.templates', 'CSS+Myghty', ('css+myghty',), (), ('text/css+myghty',)), 'MyghtyHtmlLexer': ('pygments.lexers.templates', 'HTML+Myghty', ('html+myghty',), (), ('text/html+myghty',)), @@ -100,27 +132,42 @@ 'MyghtyLexer': ('pygments.lexers.templates', 'Myghty', ('myghty',), ('*.myt', 'autodelegate'), ('application/x-myghty',)), 'MyghtyXmlLexer': ('pygments.lexers.templates', 'XML+Myghty', ('xml+myghty',), (), ('application/xml+myghty',)), 'NasmLexer': ('pygments.lexers.asm', 'NASM', ('nasm',), ('*.asm', '*.ASM'), ('text/x-nasm',)), + 'NewspeakLexer': ('pygments.lexers.other', 'Newspeak', ('newspeak',), ('*.ns2',), ('text/x-newspeak',)), 'NginxConfLexer': ('pygments.lexers.text', 'Nginx configuration file', ('nginx',), (), ('text/x-nginx-conf',)), 'NumPyLexer': ('pygments.lexers.math', 'NumPy', ('numpy',), (), ()), 'ObjdumpLexer': ('pygments.lexers.asm', 'objdump', ('objdump',), ('*.objdump',), ('text/x-objdump',)), 'ObjectiveCLexer': ('pygments.lexers.compiled', 'Objective-C', ('objective-c', 'objectivec', 'obj-c', 'objc'), ('*.m',), ('text/x-objective-c',)), + 'ObjectiveJLexer': ('pygments.lexers.web', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)), 'OcamlLexer': ('pygments.lexers.compiled', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), 'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), + 'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), 'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')), 'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)), 'PovrayLexer': ('pygments.lexers.other', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)), + 'PrologLexer': ('pygments.lexers.compiled', 'Prolog', ('prolog',), ('*.prolog', '*.pro', '*.pl'), ('text/x-prolog',)), 'Python3Lexer': ('pygments.lexers.agile', 'Python 3', ('python3', 'py3'), (), ('text/x-python3', 'application/x-python3')), 'Python3TracebackLexer': ('pygments.lexers.agile', 'Python 3.0 Traceback', ('py3tb',), ('*.py3tb',), ('text/x-python3-traceback',)), 'PythonConsoleLexer': ('pygments.lexers.agile', 'Python console session', ('pycon',), (), ('text/x-python-doctest',)), - 'PythonLexer': ('pygments.lexers.agile', 'Python', ('python', 'py'), ('*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript'), ('text/x-python', 'application/x-python')), + 'PythonLexer': ('pygments.lexers.agile', 'Python', ('python', 'py'), ('*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac'), ('text/x-python', 'application/x-python')), 'PythonTracebackLexer': ('pygments.lexers.agile', 'Python Traceback', ('pytb',), ('*.pytb',), ('text/x-python-traceback',)), + 'RConsoleLexer': ('pygments.lexers.math', 'RConsole', ('rconsole', 'rout'), ('*.Rout',), ()), + 'RagelCLexer': ('pygments.lexers.parsers', 'Ragel in C Host', ('ragel-c',), ('*.rl',), ()), + 'RagelCppLexer': ('pygments.lexers.parsers', 'Ragel in CPP Host', ('ragel-cpp',), ('*.rl',), ()), + 'RagelDLexer': ('pygments.lexers.parsers', 'Ragel in D Host', ('ragel-d',), ('*.rl',), ()), + 'RagelEmbeddedLexer': ('pygments.lexers.parsers', 'Embedded Ragel', ('ragel-em',), ('*.rl',), ()), + 'RagelJavaLexer': ('pygments.lexers.parsers', 'Ragel in Java Host', ('ragel-java',), ('*.rl',), ()), + 'RagelLexer': ('pygments.lexers.parsers', 'Ragel', ('ragel',), (), ()), + 'RagelObjectiveCLexer': ('pygments.lexers.parsers', 'Ragel in Objective C Host', ('ragel-objc',), ('*.rl',), ()), + 'RagelRubyLexer': ('pygments.lexers.parsers', 'Ragel in Ruby Host', ('ragel-ruby', 'ragel-rb'), ('*.rl',), ()), 'RawTokenLexer': ('pygments.lexers.special', 'Raw token data', ('raw',), (), ('application/x-pygments-tokens',)), + 'RebolLexer': ('pygments.lexers.other', 'REBOL', ('rebol',), ('*.r', '*.r3'), ('text/x-rebol',)), 'RedcodeLexer': ('pygments.lexers.other', 'Redcode', ('redcode',), ('*.cw',), ()), 'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)), 'RstLexer': ('pygments.lexers.text', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), 'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)), 'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('rb', 'ruby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx'), ('text/x-ruby', 'application/x-ruby')), 'SLexer': ('pygments.lexers.math', 'S', ('splus', 's', 'r'), ('*.S', '*.R'), ('text/S-plus', 'text/S', 'text/R')), + 'SassLexer': ('pygments.lexers.web', 'Sass', ('sass', 'SASS'), ('*.sass',), ('text/x-sass',)), 'ScalaLexer': ('pygments.lexers.compiled', 'Scala', ('scala',), ('*.scala',), ('text/x-scala',)), 'SchemeLexer': ('pygments.lexers.functional', 'Scheme', ('scheme', 'scm'), ('*.scm',), ('text/x-scheme', 'application/x-scheme')), 'SmalltalkLexer': ('pygments.lexers.other', 'Smalltalk', ('smalltalk', 'squeak'), ('*.st',), ('text/x-smalltalk',)), @@ -133,6 +180,8 @@ 'TcshLexer': ('pygments.lexers.other', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)), 'TexLexer': ('pygments.lexers.text', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')), 'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)), + 'ValaLexer': ('pygments.lexers.compiled', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)), + 'VbNetAspxLexer': ('pygments.lexers.dotnet', 'aspx-vb', ('aspx-vb',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), 'VbNetLexer': ('pygments.lexers.dotnet', 'VB.net', ('vb.net', 'vbnet'), ('*.vb', '*.bas'), ('text/x-vbnet', 'text/x-vba')), 'VimLexer': ('pygments.lexers.text', 'VimL', ('vim',), ('*.vim', '.vimrc'), ('text/x-vim',)), 'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')), diff --git a/pygments_package/pygments/lexers/_phpbuiltins.py b/pygments_package/pygments/lexers/_phpbuiltins.py index 5f8fa22..0c0accc 100644 --- a/pygments_package/pygments/lexers/_phpbuiltins.py +++ b/pygments_package/pygments/lexers/_phpbuiltins.py @@ -12,8 +12,8 @@ internet connection. don't run that at home, use a server ;-) - :copyright: 2006-2007 by Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ diff --git a/pygments_package/pygments/lexers/agile.py b/pygments_package/pygments/lexers/agile.py index f235d8c..bfaf0a6 100644 --- a/pygments_package/pygments/lexers/agile.py +++ b/pygments_package/pygments/lexers/agile.py @@ -5,22 +5,15 @@ Lexers for agile languages. - :copyright: 2006-2008 by Georg Brandl, Armin Ronacher, - Lukas Meuser, Tim Hatch, Jarrett Billingsley, - Tassilo Schweyer, Steven Hazel, Nick Efford, - Davy Wybiral. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ LexerContext, include, combined, do_insertions, bygroups, using -from pygments.token import Error, Text, \ +from pygments.token import Error, Text, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches from pygments import unistring as uni @@ -44,7 +37,7 @@ class PythonLexer(RegexLexer): name = 'Python' aliases = ['python', 'py'] - filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript'] + filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac'] mimetypes = ['text/x-python', 'application/x-python'] tokens = { @@ -60,10 +53,10 @@ class PythonLexer(RegexLexer): (r'(in|is|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), include('keywords'), - (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), - (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), - (r'(from)(\s+)', bygroups(Keyword.Namespace, Text), 'fromimport'), - (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), + (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), + (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), + (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), + (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), include('builtins'), include('backtick'), ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), @@ -83,14 +76,15 @@ class PythonLexer(RegexLexer): r'return|try|while|yield|as|with)\b', Keyword), ], 'builtins': [ - (r'(?>> prompt can end an exception block + # otherwise an ellipsis in place of the traceback frames + # will be mishandled insertions.append((len(curcode), [(0, Generic.Prompt, '...')])) curcode += line[3:] @@ -368,7 +366,8 @@ class PythonTracebackLexer(RegexLexer): 'root': [ (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), # SyntaxError starts with this. - (r'^(?= File "[^"]+", line \d+\n)', Generic.Traceback, 'intb'), + (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), + (r'^.*\n', Other), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', @@ -509,8 +508,8 @@ def intp_string_callback(self, match, ctx): (r'"', String.Double, 'simple-string'), (r'(?! states['strings'] += [ # %r regex - (r'(%r(.))(.*?)(\2[mixounse]*)', intp_regex_callback), - # regular fancy strings - (r'%[qsw](.).*?\1', String.Other), - (r'(%[QWx](.))(.*?)(\2)', intp_string_callback), + (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)', + intp_regex_callback), + # regular fancy strings with qsw + (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other), + (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)', + intp_string_callback), # special forms of fancy strings after operators or # in method calls with braces - (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ]).*?\3)', + (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), - # and because of fixed with lookbehinds the whole thing a + # and because of fixed width lookbehinds the whole thing a # second time for line startings... - (r'^(\s*)(%([\t ]).*?\3)', + (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', bygroups(Text, String.Other, None)), - # all regular fancy strings - (r'(%([^a-zA-Z0-9\s]))(.*?)(\2)', intp_string_callback), + # all regular fancy strings without qsw + (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)', + intp_string_callback), ] return states @@ -630,7 +631,7 @@ def intp_string_callback(self, match, ctx): # empty string heredocs (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), (r'__END__', Comment.Preproc, 'end-part'), - # multiline regex (after keywords or assignemnts) + # multiline regex (after keywords or assignments) (r'(?:^|(?<=[=<>~!])|' r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' @@ -654,7 +655,7 @@ def intp_string_callback(self, match, ctx): r'(?<=^match\s)|' r'(?<=^if\s)|' r'(?<=^elsif\s)' - r')(\s*)(/)(?!=)', bygroups(Text, String.Regex), 'multiline-regex'), + r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'), # multiline regex (in method calls) (r'(?<=\(|,)/', String.Regex, 'multiline-regex'), # multiline regex (this time the funny no whitespace rule) @@ -664,7 +665,7 @@ def intp_string_callback(self, match, ctx): # better ideas?) # since pygments 0.7 we also eat a "?" operator after numbers # so that the char operator does not work. Chars are not allowed - # there so that you can use the terner operator. + # there so that you can use the ternary operator. # stupid example: # x>=0?n[x]:"" (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', @@ -744,6 +745,7 @@ def intp_string_callback(self, match, ctx): ], 'multiline-regex': [ include('string-intp'), + (r'\\\\', String.Regex), (r'\\/', String.Regex), (r'[\\#]', String.Regex), (r'[^\\/#]+', String.Regex), @@ -829,11 +831,10 @@ class PerlLexer(RegexLexer): (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'), (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'), (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'), - (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), ], 'root': [ (r'\#.*?$', Comment.Single), - (r'=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), + (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), (r'(case|continue|do|else|elsif|for|foreach|if|last|my|' r'next|our|redo|reset|then|unless|until|while|use|' r'print|new|BEGIN|END|return)\b', Keyword), @@ -853,6 +854,7 @@ class PerlLexer(RegexLexer): (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'), (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex), + (r'm(?=[/!\\{<\[\(@%\$])', String.Regex, 'balanced-regex'), (r'((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[gcimosx]*', String.Regex), (r'\s+', Text), (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|' @@ -883,7 +885,7 @@ class PerlLexer(RegexLexer): r'utime|values|vec|wait|waitpid|wantarray|warn|write' r')\b', Name.Builtin), (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), - (r'<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\n', String), + (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String), (r'__END__', Comment.Preproc, 'end-part'), (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), @@ -900,7 +902,7 @@ class PerlLexer(RegexLexer): (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'), (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'), (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'), - (r'(q|qq|qw|qr|qx)(.)[.\n]*?\1', String.Other), + (r'(q|qq|qw|qr|qx)([^a-zA-Z0-9])(.|\n)*?\2', String.Other), (r'package\s+', Keyword, 'modulename'), (r'sub\s+', Keyword, 'funcname'), (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|' @@ -964,7 +966,7 @@ class PerlLexer(RegexLexer): (r'\\', String.Other), (r'\<', String.Other, 'lt-string'), (r'\>', String.Other, '#pop'), - (r'[^\<\>]]+', String.Other) + (r'[^\<\>]+', String.Other) ], 'end-part': [ (r'.+', Comment.Preproc, '#pop') @@ -972,7 +974,11 @@ class PerlLexer(RegexLexer): } def analyse_text(text): - return shebang_matches(text, r'perl(\d\.\d\.\d)?') + if shebang_matches(text, r'perl(\d\.\d\.\d)?'): + return True + if 'my $' in text: + return 0.9 + return 0.1 # who knows, might still be perl! class LuaLexer(RegexLexer): @@ -1005,6 +1011,11 @@ class LuaLexer(RegexLexer): tokens = { 'root': [ + # lua allows a file to start with a shebang + (r'#!(.*?)$', Comment.Preproc), + (r'', Text, 'base'), + ], + 'base': [ (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline), ('--.*$', Comment.Single), @@ -1015,6 +1026,7 @@ class LuaLexer(RegexLexer): (r'\n', Text), (r'[^\S\n]', Text), + (r'(?s)\[(=*)\[.*?\]\1\]', String.Multiline), (r'[\[\]\{\}\(\)\.,:;]', Punctuation), (r'(==|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#])', Operator), @@ -1111,9 +1123,9 @@ class MiniDLexer(RegexLexer): (r'\n', Text), (r'\s+', Text), # Comments - (r'//(.*?)\n', Comment), - (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), - (r'/\+', Comment, 'nestedcomment'), + (r'//(.*?)\n', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), + (r'/\+', Comment.Multiline, 'nestedcomment'), # Keywords (r'(as|assert|break|case|catch|class|continue|coroutine|default' r'|do|else|finally|for|foreach|function|global|namespace' @@ -1153,10 +1165,10 @@ class MiniDLexer(RegexLexer): (r'[a-zA-Z_]\w*', Name), ], 'nestedcomment': [ - (r'[^+/]+', Comment), - (r'/\+', Comment, '#push'), - (r'\+/', Comment, '#pop'), - (r'[+/]', Comment), + (r'[^+/]+', Comment.Multiline), + (r'/\+', Comment.Multiline, '#push'), + (r'\+/', Comment.Multiline, '#pop'), + (r'[+/]', Comment.Multiline), ], } @@ -1177,10 +1189,10 @@ class IoLexer(RegexLexer): (r'\n', Text), (r'\s+', Text), # Comments - (r'//(.*?)\n', Comment), - (r'#(.*?)\n', Comment), - (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), - (r'/\+', Comment, 'nestedcomment'), + (r'//(.*?)\n', Comment.Single), + (r'#(.*?)\n', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), + (r'/\+', Comment.Multiline, 'nestedcomment'), # DoubleQuotedString (r'"(\\\\|\\"|[^"])*"', String), # Operators @@ -1200,10 +1212,10 @@ class IoLexer(RegexLexer): (r'\d+', Number.Integer) ], 'nestedcomment': [ - (r'[^+/]+', Comment), - (r'/\+', Comment, '#push'), - (r'\+/', Comment, '#pop'), - (r'[+/]', Comment), + (r'[^+/]+', Comment.Multiline), + (r'/\+', Comment.Multiline, '#push'), + (r'\+/', Comment.Multiline, '#pop'), + (r'[+/]', Comment.Multiline), ] } @@ -1252,6 +1264,7 @@ def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""): include('command'), include('basic'), include('data'), + (r'}', Keyword), # HACK: somehow we miscounted our braces ], 'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re), 'command-in-brace': _gen_command_rules(keyword_cmds_re, @@ -1301,12 +1314,12 @@ def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""): ], 'string': [ (r'\[', String.Double, 'string-square'), - (r'(\\\\|\\[0-7]+|\\.|[^"])', String.Double), + (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\])', String.Double), (r'"', String.Double, '#pop') ], 'string-square': [ (r'\[', String.Double, 'string-square'), - (r'(\\\\|\\[0-7]+|\\.|[^\]])', String.Double), + (r'(?s)(\\\\|\\[0-7]+|\\.|\\\n|[^\]\\])', String.Double), (r'\]', String.Double, '#pop') ], 'brace': [ @@ -1430,7 +1443,7 @@ class ClojureLexer(RegexLexer): # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), - (r"\\([()/'\".'_!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), + (r"\\([()/'\".'_!§$%& ?;=#+-]{1}|[a-zA-Z0-9]+)", String.Char), # constants (r'(#t|#f)', Name.Constant), diff --git a/pygments_package/pygments/lexers/asm.py b/pygments_package/pygments/lexers/asm.py index fffadca..4740569 100644 --- a/pygments_package/pygments/lexers/asm.py +++ b/pygments_package/pygments/lexers/asm.py @@ -5,18 +5,13 @@ Lexers for assembly languages. - :copyright: 2007 by Frits van Bommel. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set -from pygments.lexer import RegexLexer, include, bygroups, using, \ - this, DelegatingLexer +from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer from pygments.lexers.compiled import DLexer, CppLexer, CLexer from pygments.token import * @@ -92,6 +87,8 @@ class GasLexer(RegexLexer): ] } + def analyse_text(text): + return re.match(r'^\.\w+', text, re.M) class ObjdumpLexer(RegexLexer): """ @@ -198,7 +195,7 @@ class LlvmLexer(RegexLexer): #: optional Comment or Whitespace string = r'"[^"]*?"' - identifier = r'([a-zA-Z$._][a-zA-Z$._0-9]*|' + string + ')' + identifier = r'([-a-zA-Z$._][-a-zA-Z$._0-9]*|' + string + ')' tokens = { 'root': [ @@ -210,14 +207,17 @@ class LlvmLexer(RegexLexer): include('keyword'), (r'%' + identifier, Name.Variable),#Name.Identifier.Local), - (r'@' + identifier, Name.Constant),#Name.Identifier.Global), + (r'@' + identifier, Name.Variable.Global),#Name.Identifier.Global), (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous), + (r'@\d+', Name.Variable.Global),#Name.Identifier.Anonymous), + (r'!' + identifier, Name.Variable), + (r'!\d+', Name.Variable.Anonymous), (r'c?' + string, String), (r'0[xX][a-fA-F0-9]+', Number), (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), - (r'[=<>{}\[\]()*.,]|x\b', Punctuation) + (r'[=<>{}\[\]()*.,!]|x\b', Punctuation) ], 'whitespace': [ (r'(\n|\s)+', Text), @@ -225,36 +225,56 @@ class LlvmLexer(RegexLexer): ], 'keyword': [ # Regular keywords - (r'(void|label|float|double|opaque' - r'|to' - r'|alias|type' - r'|zeroext|signext|inreg|sret|noalias|noreturn|nounwind|nest' - r'|module|asm|target|datalayout|triple' - r'|true|false|null|zeroinitializer|undef' - r'|global|internal|external|linkonce|weak|appending|extern_weak' - r'|dllimport|dllexport' - r'|ccc|fastcc|coldcc|cc|tail' - r'|default|hidden|protected' - r'|thread_local|constant|align|section' - r'|define|declare' - - # Statements & expressions - r'|trunc|zext|sext|fptrunc|fpext|fptoui|fptosi|uitofp|sitofp' - r'|ptrtoint|inttoptr|bitcast|getelementptr|select|icmp|fcmp' - r'|extractelement|insertelement|shufflevector' - r'|sideeffect|volatile' - r'|ret|br|switch|invoke|unwind|unreachable' - r'|add|sub|mul|udiv|sdiv|fdiv|urem|srem|frem' - r'|shl|lshr|ashr|and|or|xor' - r'|malloc|free|alloca|load|store' - r'|phi|call|va_arg|va_list' - - # Comparison condition codes for icmp - r'|eq|ne|ugt|uge|ult|ule|sgt|sge|slt|sle' - # Ditto for fcmp: (minus keywords mentioned in other contexts) - r'|oeq|ogt|oge|olt|ole|one|ord|ueq|ugt|uge|une|uno' + (r'(begin|end' + r'|true|false' + r'|declare|define' + r'|global|constant' + + r'|private|linker_private|internal|available_externally|linkonce' + r'|linkonce_odr|weak|weak_odr|appending|dllimport|dllexport' + r'|common|default|hidden|protected|extern_weak|external' + r'|thread_local|zeroinitializer|undef|null|to|tail|target|triple' + r'|deplibs|datalayout|volatile|nuw|nsw|exact|inbounds|align' + r'|addrspace|section|alias|module|asm|sideeffect|gc|dbg' + + r'|ccc|fastcc|coldcc|x86_stdcallcc|x86_fastcallcc|arm_apcscc' + r'|arm_aapcscc|arm_aapcs_vfpcc' + + r'|cc|c' + + r'|signext|zeroext|inreg|sret|nounwind|noreturn|noalias|nocapture' + r'|byval|nest|readnone|readonly' + + r'|inlinehint|noinline|alwaysinline|optsize|ssp|sspreq|noredzone' + r'|noimplicitfloat|naked' + + r'|type|opaque' + + r'|eq|ne|slt|sgt|sle' + r'|sge|ult|ugt|ule|uge' + r'|oeq|one|olt|ogt|ole' + r'|oge|ord|uno|ueq|une' + r'|x' + + # instructions + r'|add|fadd|sub|fsub|mul|fmul|udiv|sdiv|fdiv|urem|srem|frem|shl' + r'|lshr|ashr|and|or|xor|icmp|fcmp' + + r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui' + r'fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch' + r'|invoke|unwind|unreachable' + + r'|malloc|alloca|free|load|store|getelementptr' + + r'|extractelement|insertelement|shufflevector|getresult' + r'|extractvalue|insertvalue' r')\b', Keyword), + + # Types + (r'void|float|double|x86_fp80|fp128|ppc_fp128|label|metadata', + Keyword.Type), + # Integer types (r'i[1-9]\d*', Keyword) ] @@ -271,7 +291,7 @@ class NasmLexer(RegexLexer): mimetypes = ['text/x-nasm'] identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*' - hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9a-fA-F]+h)' + hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9]+[0-9a-fA-F]*h)' octn = r'[0-7]+q' binn = r'[01]+b' decn = r'[0-9]+' @@ -283,7 +303,8 @@ class NasmLexer(RegexLexer): wordop = r'seg|wrt|strict' type = r'byte|[dq]?word' directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' - r'COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE') + r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' + r'EXPORT|LIBRARY|MODULE') flags = re.IGNORECASE | re.MULTILINE tokens = { @@ -291,10 +312,10 @@ class NasmLexer(RegexLexer): include('whitespace'), (r'^\s*%', Comment.Preproc, 'preproc'), (identifier + ':', Name.Label), - (directives, Keyword, 'instruction-args'), - (r'(%s)\s+(equ)' % identifier, - bygroups(Name.Constant, Keyword.Declaration), + (r'(%s)(\s+)(equ)' % identifier, + bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), 'instruction-args'), + (directives, Keyword, 'instruction-args'), (declkw, Keyword.Declaration, 'instruction-args'), (identifier, Name.Function, 'instruction-args'), (r'[\r\n]+', Text) diff --git a/pygments_package/pygments/lexers/compiled.py b/pygments_package/pygments/lexers/compiled.py index ba09275..a2543e2 100644 --- a/pygments_package/pygments/lexers/compiled.py +++ b/pygments_package/pygments/lexers/compiled.py @@ -5,20 +5,15 @@ Lexers for compiled languages. - :copyright: 2006-2008 by Georg Brandl, Armin Ronacher, Christoph Hack, - Whitney Young, Kirk McDonald, Stou Sandalski, Krzysiek Goj. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set from pygments.scanner import Scanner from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - this + this, combined from pygments.util import get_bool_opt, get_list_opt from pygments.token import \ Text, Comment, Operator, Keyword, Name, String, Number, Punctuation, \ @@ -27,8 +22,11 @@ # backwards compatibility from pygments.lexers.functional import OcamlLexer -__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'JavaLexer', 'ScalaLexer', - 'DylanLexer', 'OcamlLexer', 'ObjectiveCLexer', 'FortranLexer'] +__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'JavaLexer', + 'ScalaLexer', 'DylanLexer', 'OcamlLexer', 'ObjectiveCLexer', + 'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer', + 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer', + 'Modula2Lexer'] class CLexer(RegexLexer): @@ -47,11 +45,12 @@ class CLexer(RegexLexer): 'whitespace': [ (r'^\s*#if\s+0', Comment.Preproc, 'if0'), (r'^\s*#', Comment.Preproc, 'macro'), + (r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*:(?!:))', bygroups(Text, Name.Label)), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation - (r'//(\n|(.|\n)*?[^\\]\n)', Comment), - (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), + (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ (r'L?"', String, 'string'), @@ -61,6 +60,7 @@ class CLexer(RegexLexer): (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'\d+[Ll]?', Number.Integer), + (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)), @@ -73,7 +73,6 @@ class CLexer(RegexLexer): (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|' r'declspec|finally|int64|try|leave)\b', Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), - ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), ], 'root': [ @@ -117,16 +116,16 @@ class CLexer(RegexLexer): ], 'macro': [ (r'[^/\n]+', Comment.Preproc), - (r'/[*](.|\n)*?[*]/', Comment), - (r'//.*?\n', Comment, '#pop'), + (r'/[*](.|\n)*?[*]/', Comment.Multiline), + (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ - (r'^\s*#if.*?(?/-]', Operator), (r'[()\[\],.;]', Punctuation), (r'(asm|auto|break|case|catch|const|const_cast|continue|' @@ -223,15 +223,15 @@ class CppLexer(RegexLexer): ], 'macro': [ (r'[^/\n]+', Comment.Preproc), - (r'/[*](.|\n)*?[*]/', Comment), - (r'//.*?\n', Comment, '#pop'), + (r'/[*](.|\n)*?[*]/', Comment.Multiline), + (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ - (r'^\s*#if.*?(?`_ source code. @@ -941,11 +944,12 @@ class ScalaLexer(RegexLexer): #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' - op = ur'[-~\^\*!%&\\<>\|+=:/?@\u00a6-\u00a7\u00a9\u00ac\u00ae\u00b0-\u00b1\u00b6\u00d7\u00f7\u03f6\u0482\u0606-\u0608\u060e-\u060f\u06e9\u06fd-\u06fe\u07f6\u09fa\u0b70\u0bf3-\u0bf8\u0bfa\u0c7f\u0cf1-\u0cf2\u0d79\u0f01-\u0f03\u0f13-\u0f17\u0f1a-\u0f1f\u0f34\u0f36\u0f38\u0fbe-\u0fc5\u0fc7-\u0fcf\u109e-\u109f\u1360\u1390-\u1399\u1940\u19e0-\u19ff\u1b61-\u1b6a\u1b74-\u1b7c\u2044\u2052\u207a-\u207c\u208a-\u208c\u2100-\u2101\u2103-\u2106\u2108-\u2109\u2114\u2116-\u2118\u211e-\u2123\u2125\u2127\u2129\u212e\u213a-\u213b\u2140-\u2144\u214a-\u214d\u214f\u2190-\u2328\u232b-\u244a\u249c-\u24e9\u2500-\u2767\u2794-\u27c4\u27c7-\u27e5\u27f0-\u2982\u2999-\u29d7\u29dc-\u29fb\u29fe-\u2b54\u2ce5-\u2cea\u2e80-\u2ffb\u3004\u3012-\u3013\u3020\u3036-\u3037\u303e-\u303f\u3190-\u3191\u3196-\u319f\u31c0-\u31e3\u3200-\u321e\u322a-\u3250\u3260-\u327f\u328a-\u32b0\u32c0-\u33ff\u4dc0-\u4dff\ua490-\ua4c6\ua828-\ua82b\ufb29\ufdfd\ufe62\ufe64-\ufe66\uff0b\uff1c-\uff1e\uff5c\uff5e\uffe2\uffe4\uffe8-\uffee\ufffc-\ufffd]+' + # don't use raw unicode strings! + op = u'[-~\\^\\*!%&\\\\<>\\|+=:/?@\u00a6-\u00a7\u00a9\u00ac\u00ae\u00b0-\u00b1\u00b6\u00d7\u00f7\u03f6\u0482\u0606-\u0608\u060e-\u060f\u06e9\u06fd-\u06fe\u07f6\u09fa\u0b70\u0bf3-\u0bf8\u0bfa\u0c7f\u0cf1-\u0cf2\u0d79\u0f01-\u0f03\u0f13-\u0f17\u0f1a-\u0f1f\u0f34\u0f36\u0f38\u0fbe-\u0fc5\u0fc7-\u0fcf\u109e-\u109f\u1360\u1390-\u1399\u1940\u19e0-\u19ff\u1b61-\u1b6a\u1b74-\u1b7c\u2044\u2052\u207a-\u207c\u208a-\u208c\u2100-\u2101\u2103-\u2106\u2108-\u2109\u2114\u2116-\u2118\u211e-\u2123\u2125\u2127\u2129\u212e\u213a-\u213b\u2140-\u2144\u214a-\u214d\u214f\u2190-\u2328\u232b-\u244a\u249c-\u24e9\u2500-\u2767\u2794-\u27c4\u27c7-\u27e5\u27f0-\u2982\u2999-\u29d7\u29dc-\u29fb\u29fe-\u2b54\u2ce5-\u2cea\u2e80-\u2ffb\u3004\u3012-\u3013\u3020\u3036-\u3037\u303e-\u303f\u3190-\u3191\u3196-\u319f\u31c0-\u31e3\u3200-\u321e\u322a-\u3250\u3260-\u327f\u328a-\u32b0\u32c0-\u33ff\u4dc0-\u4dff\ua490-\ua4c6\ua828-\ua82b\ufb29\ufdfd\ufe62\ufe64-\ufe66\uff0b\uff1c-\uff1e\uff5c\uff5e\uffe2\uffe4\uffe8-\uffee\ufffc-\ufffd]+' + + letter = u'[a-zA-Z\\$_\u00aa\u00b5\u00ba\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02af\u0370-\u0373\u0376-\u0377\u037b-\u037d\u0386\u0388-\u03f5\u03f7-\u0481\u048a-\u0556\u0561-\u0587\u05d0-\u05f2\u0621-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u0904-\u0939\u093d\u0950\u0958-\u0961\u0972-\u097f\u0985-\u09b9\u09bd\u09ce\u09dc-\u09e1\u09f0-\u09f1\u0a05-\u0a39\u0a59-\u0a5e\u0a72-\u0a74\u0a85-\u0ab9\u0abd\u0ad0-\u0ae1\u0b05-\u0b39\u0b3d\u0b5c-\u0b61\u0b71\u0b83-\u0bb9\u0bd0\u0c05-\u0c3d\u0c58-\u0c61\u0c85-\u0cb9\u0cbd\u0cde-\u0ce1\u0d05-\u0d3d\u0d60-\u0d61\u0d7a-\u0d7f\u0d85-\u0dc6\u0e01-\u0e30\u0e32-\u0e33\u0e40-\u0e45\u0e81-\u0eb0\u0eb2-\u0eb3\u0ebd-\u0ec4\u0edc-\u0f00\u0f40-\u0f6c\u0f88-\u0f8b\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065-\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10fa\u1100-\u135a\u1380-\u138f\u13a0-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u1770\u1780-\u17b3\u17dc\u1820-\u1842\u1844-\u18a8\u18aa-\u191c\u1950-\u19a9\u19c1-\u19c7\u1a00-\u1a16\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae-\u1baf\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1d00-\u1d2b\u1d62-\u1d77\u1d79-\u1d9a\u1e00-\u1fbc\u1fbe\u1fc2-\u1fcc\u1fd0-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ffc\u2071\u207f\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c7c\u2c80-\u2ce4\u2d00-\u2d65\u2d80-\u2dde\u3006-\u3007\u3021-\u3029\u3038-\u303a\u303c\u3041-\u3096\u309f\u30a1-\u30fa\u30ff-\u318e\u31a0-\u31b7\u31f0-\u31ff\u3400-\u4db5\u4e00-\ua014\ua016-\ua48c\ua500-\ua60b\ua610-\ua61f\ua62a-\ua66e\ua680-\ua697\ua722-\ua76f\ua771-\ua787\ua78b-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uac00-\ud7a3\uf900-\ufb1d\ufb1f-\ufb28\ufb2a-\ufd3d\ufd50-\ufdfb\ufe70-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uff6f\uff71-\uff9d\uffa0-\uffdc]' - letter = ur'[a-zA-Z\$_\u00aa\u00b5\u00ba\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02af\u0370-\u0373\u0376-\u0377\u037b-\u037d\u0386\u0388-\u03f5\u03f7-\u0481\u048a-\u0556\u0561-\u0587\u05d0-\u05f2\u0621-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u0904-\u0939\u093d\u0950\u0958-\u0961\u0972-\u097f\u0985-\u09b9\u09bd\u09ce\u09dc-\u09e1\u09f0-\u09f1\u0a05-\u0a39\u0a59-\u0a5e\u0a72-\u0a74\u0a85-\u0ab9\u0abd\u0ad0-\u0ae1\u0b05-\u0b39\u0b3d\u0b5c-\u0b61\u0b71\u0b83-\u0bb9\u0bd0\u0c05-\u0c3d\u0c58-\u0c61\u0c85-\u0cb9\u0cbd\u0cde-\u0ce1\u0d05-\u0d3d\u0d60-\u0d61\u0d7a-\u0d7f\u0d85-\u0dc6\u0e01-\u0e30\u0e32-\u0e33\u0e40-\u0e45\u0e81-\u0eb0\u0eb2-\u0eb3\u0ebd-\u0ec4\u0edc-\u0f00\u0f40-\u0f6c\u0f88-\u0f8b\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065-\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10fa\u1100-\u135a\u1380-\u138f\u13a0-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u1770\u1780-\u17b3\u17dc\u1820-\u1842\u1844-\u18a8\u18aa-\u191c\u1950-\u19a9\u19c1-\u19c7\u1a00-\u1a16\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae-\u1baf\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1d00-\u1d2b\u1d62-\u1d77\u1d79-\u1d9a\u1e00-\u1fbc\u1fbe\u1fc2-\u1fcc\u1fd0-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ffc\u2071\u207f\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c7c\u2c80-\u2ce4\u2d00-\u2d65\u2d80-\u2dde\u3006-\u3007\u3021-\u3029\u3038-\u303a\u303c\u3041-\u3096\u309f\u30a1-\u30fa\u30ff-\u318e\u31a0-\u31b7\u31f0-\u31ff\u3400-\u4db5\u4e00-\ua014\ua016-\ua48c\ua500-\ua60b\ua610-\ua61f\ua62a-\ua66e\ua680-\ua697\ua722-\ua76f\ua771-\ua787\ua78b-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uac00-\ud7a3\uf900-\ufb1d\ufb1f-\ufb28\ufb2a-\ufd3d\ufd50-\ufdfb\ufe70-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uff6f\uff71-\uff9d\uffa0-\uffdc]' - - upper = ur'[A-Z\$_\u00c0-\u00d6\u00d8-\u00de\u0100\u0102\u0104\u0106\u0108\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130\u0132\u0134\u0136\u0139\u013b\u013d\u013f\u0141\u0143\u0145\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e\u0170\u0172\u0174\u0176\u0178-\u0179\u017b\u017d\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018b\u018e-\u0191\u0193-\u0194\u0196-\u0198\u019c-\u019d\u019f-\u01a0\u01a2\u01a4\u01a6-\u01a7\u01a9\u01ac\u01ae-\u01af\u01b1-\u01b3\u01b5\u01b7-\u01b8\u01bc\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe\u0200\u0202\u0204\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c\u022e\u0230\u0232\u023a-\u023b\u023d-\u023e\u0241\u0243-\u0246\u0248\u024a\u024c\u024e\u0370\u0372\u0376\u0386\u0388-\u038f\u0391-\u03ab\u03cf\u03d2-\u03d4\u03d8\u03da\u03dc\u03de\u03e0\u03e2\u03e4\u03e6\u03e8\u03ea\u03ec\u03ee\u03f4\u03f7\u03f9-\u03fa\u03fd-\u042f\u0460\u0462\u0464\u0466\u0468\u046a\u046c\u046e\u0470\u0472\u0474\u0476\u0478\u047a\u047c\u047e\u0480\u048a\u048c\u048e\u0490\u0492\u0494\u0496\u0498\u049a\u049c\u049e\u04a0\u04a2\u04a4\u04a6\u04a8\u04aa\u04ac\u04ae\u04b0\u04b2\u04b4\u04b6\u04b8\u04ba\u04bc\u04be\u04c0-\u04c1\u04c3\u04c5\u04c7\u04c9\u04cb\u04cd\u04d0\u04d2\u04d4\u04d6\u04d8\u04da\u04dc\u04de\u04e0\u04e2\u04e4\u04e6\u04e8\u04ea\u04ec\u04ee\u04f0\u04f2\u04f4\u04f6\u04f8\u04fa\u04fc\u04fe\u0500\u0502\u0504\u0506\u0508\u050a\u050c\u050e\u0510\u0512\u0514\u0516\u0518\u051a\u051c\u051e\u0520\u0522\u0531-\u0556\u10a0-\u10c5\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36\u1e38\u1e3a\u1e3c\u1e3e\u1e40\u1e42\u1e44\u1e46\u1e48\u1e4a\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e\u1e80\u1e82\u1e84\u1e86\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6\u1eb8\u1eba\u1ebc\u1ebe\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe\u1f08-\u1f0f\u1f18-\u1f1d\u1f28-\u1f2f\u1f38-\u1f3f\u1f48-\u1f4d\u1f59-\u1f5f\u1f68-\u1f6f\u1fb8-\u1fbb\u1fc8-\u1fcb\u1fd8-\u1fdb\u1fe8-\u1fec\u1ff8-\u1ffb\u2102\u2107\u210b-\u210d\u2110-\u2112\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u2130-\u2133\u213e-\u213f\u2145\u2183\u2c00-\u2c2e\u2c60\u2c62-\u2c64\u2c67\u2c69\u2c6b\u2c6d-\u2c6f\u2c72\u2c75\u2c80\u2c82\u2c84\u2c86\u2c88\u2c8a\u2c8c\u2c8e\u2c90\u2c92\u2c94\u2c96\u2c98\u2c9a\u2c9c\u2c9e\u2ca0\u2ca2\u2ca4\u2ca6\u2ca8\u2caa\u2cac\u2cae\u2cb0\u2cb2\u2cb4\u2cb6\u2cb8\u2cba\u2cbc\u2cbe\u2cc0\u2cc2\u2cc4\u2cc6\u2cc8\u2cca\u2ccc\u2cce\u2cd0\u2cd2\u2cd4\u2cd6\u2cd8\u2cda\u2cdc\u2cde\u2ce0\u2ce2\ua640\ua642\ua644\ua646\ua648\ua64a\ua64c\ua64e\ua650\ua652\ua654\ua656\ua658\ua65a\ua65c\ua65e\ua662\ua664\ua666\ua668\ua66a\ua66c\ua680\ua682\ua684\ua686\ua688\ua68a\ua68c\ua68e\ua690\ua692\ua694\ua696\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736\ua738\ua73a\ua73c\ua73e\ua740\ua742\ua744\ua746\ua748\ua74a\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b\ua77d-\ua77e\ua780\ua782\ua784\ua786\ua78b\uff21-\uff3a]' + upper = u'[A-Z\\$_\u00c0-\u00d6\u00d8-\u00de\u0100\u0102\u0104\u0106\u0108\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130\u0132\u0134\u0136\u0139\u013b\u013d\u013f\u0141\u0143\u0145\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e\u0170\u0172\u0174\u0176\u0178-\u0179\u017b\u017d\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018b\u018e-\u0191\u0193-\u0194\u0196-\u0198\u019c-\u019d\u019f-\u01a0\u01a2\u01a4\u01a6-\u01a7\u01a9\u01ac\u01ae-\u01af\u01b1-\u01b3\u01b5\u01b7-\u01b8\u01bc\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe\u0200\u0202\u0204\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c\u022e\u0230\u0232\u023a-\u023b\u023d-\u023e\u0241\u0243-\u0246\u0248\u024a\u024c\u024e\u0370\u0372\u0376\u0386\u0388-\u038f\u0391-\u03ab\u03cf\u03d2-\u03d4\u03d8\u03da\u03dc\u03de\u03e0\u03e2\u03e4\u03e6\u03e8\u03ea\u03ec\u03ee\u03f4\u03f7\u03f9-\u03fa\u03fd-\u042f\u0460\u0462\u0464\u0466\u0468\u046a\u046c\u046e\u0470\u0472\u0474\u0476\u0478\u047a\u047c\u047e\u0480\u048a\u048c\u048e\u0490\u0492\u0494\u0496\u0498\u049a\u049c\u049e\u04a0\u04a2\u04a4\u04a6\u04a8\u04aa\u04ac\u04ae\u04b0\u04b2\u04b4\u04b6\u04b8\u04ba\u04bc\u04be\u04c0-\u04c1\u04c3\u04c5\u04c7\u04c9\u04cb\u04cd\u04d0\u04d2\u04d4\u04d6\u04d8\u04da\u04dc\u04de\u04e0\u04e2\u04e4\u04e6\u04e8\u04ea\u04ec\u04ee\u04f0\u04f2\u04f4\u04f6\u04f8\u04fa\u04fc\u04fe\u0500\u0502\u0504\u0506\u0508\u050a\u050c\u050e\u0510\u0512\u0514\u0516\u0518\u051a\u051c\u051e\u0520\u0522\u0531-\u0556\u10a0-\u10c5\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36\u1e38\u1e3a\u1e3c\u1e3e\u1e40\u1e42\u1e44\u1e46\u1e48\u1e4a\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e\u1e80\u1e82\u1e84\u1e86\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6\u1eb8\u1eba\u1ebc\u1ebe\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe\u1f08-\u1f0f\u1f18-\u1f1d\u1f28-\u1f2f\u1f38-\u1f3f\u1f48-\u1f4d\u1f59-\u1f5f\u1f68-\u1f6f\u1fb8-\u1fbb\u1fc8-\u1fcb\u1fd8-\u1fdb\u1fe8-\u1fec\u1ff8-\u1ffb\u2102\u2107\u210b-\u210d\u2110-\u2112\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u2130-\u2133\u213e-\u213f\u2145\u2183\u2c00-\u2c2e\u2c60\u2c62-\u2c64\u2c67\u2c69\u2c6b\u2c6d-\u2c6f\u2c72\u2c75\u2c80\u2c82\u2c84\u2c86\u2c88\u2c8a\u2c8c\u2c8e\u2c90\u2c92\u2c94\u2c96\u2c98\u2c9a\u2c9c\u2c9e\u2ca0\u2ca2\u2ca4\u2ca6\u2ca8\u2caa\u2cac\u2cae\u2cb0\u2cb2\u2cb4\u2cb6\u2cb8\u2cba\u2cbc\u2cbe\u2cc0\u2cc2\u2cc4\u2cc6\u2cc8\u2cca\u2ccc\u2cce\u2cd0\u2cd2\u2cd4\u2cd6\u2cd8\u2cda\u2cdc\u2cde\u2ce0\u2ce2\ua640\ua642\ua644\ua646\ua648\ua64a\ua64c\ua64e\ua650\ua652\ua654\ua656\ua658\ua65a\ua65c\ua65e\ua662\ua664\ua666\ua668\ua66a\ua66c\ua680\ua682\ua684\ua686\ua688\ua68a\ua68c\ua68e\ua690\ua692\ua694\ua696\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736\ua738\ua73a\ua73c\ua73e\ua740\ua742\ua744\ua746\ua748\ua74a\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b\ua77d-\ua77e\ua780\ua782\ua784\ua786\ua78b\uff21-\uff3a]' idrest = ur'%s(?:%s|[0-9])*(?:(?<=_)%s)?' % (letter, letter, op) @@ -955,7 +959,7 @@ class ScalaLexer(RegexLexer): (r'(class|trait|object)(\s+)', bygroups(Keyword, Text), 'class'), (ur"'%s" % idrest, Text.Symbol), (r'[^\S\n]+', Text), - (r'//.*?\n', Comment), + (r'//.*?\n', Comment.Single), (r'/\*', Comment.Multiline, 'comment'), (ur'@%s' % idrest, Name.Decorator), (ur'(abstract|ca(?:se|tch)|d(?:ef|o)|e(?:lse|xtends)|' @@ -963,7 +967,7 @@ class ScalaLexer(RegexLexer): ur'lazy|match|new|override|pr(?:ivate|otected)' ur'|re(?:quires|turn)|s(?:ealed|uper)|' ur't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\b|' - ur'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\b|(?=\s)|$)', Keyword), + u'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\b|(?=\\s)|$)', Keyword), (ur':(?!%s)' % op, Keyword, 'type'), (ur'%s%s\b' % (upper, idrest), Name.Class), (r'(true|false|null)\b', Keyword.Constant), @@ -972,13 +976,15 @@ class ScalaLexer(RegexLexer): (r'"""(?:.|\n)*?"""', String), (r'"(\\\\|\\"|[^"])*"', String), (ur"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), -# (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, Name.Attribute)), +# (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, +# Name.Attribute)), (idrest, Name), (r'`[^`]+`', Name), (r'\[', Operator, 'typeparam'), (r'[\(\)\{\};,.]', Operator), (op, Operator), - (ur'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?', Number.Float), + (ur'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?', + Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text) @@ -992,21 +998,24 @@ class ScalaLexer(RegexLexer): (ur'%s|%s|`[^`]+`' % (idrest, op), Name.Class, '#pop'), ], 'type': [ - (r'(?<=\])(\s*$)', Text, '#pop'), - (r'[\s\n]+', Text), - (ur'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword), - (r'([,\);}]|=(?!>))([\s\n]*)', bygroups(Operator, Text), '#pop'), + (r'\s+', Text), + (u'<[%:]|>:|[#_\u21D2]|forSome|type', Keyword), + (r'([,\);}]|=>|=)([\s\n]*)', bygroups(Operator, Text), '#pop'), (r'[\(\{]', Operator, '#push'), - (ur'((?:\.|%s|%s|`[^`]+`)+)([\s]*)(\[)' % (idrest, op), bygroups(Keyword.Type, Text, Operator), 'typeparam'), - (ur'((?:\.|%s|%s|`[^`]+`)+)(\s*)$' % (idrest, op), bygroups(Keyword.Type, Text), '#pop'), - (ur'(\.|%s|%s|`[^`]+`)+' % (idrest, op), Keyword.Type) + (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)(\[)' % + (idrest, op, idrest, op), + bygroups(Keyword.Type, Text, Operator), ('#pop', 'typeparam')), + (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)$' % + (idrest, op, idrest, op), + bygroups(Keyword.Type, Text), '#pop'), + (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) ], 'typeparam': [ (r'[\s\n,]+', Text), - (ur'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword), + (u'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword), (r'([\]\)\}])', Operator, '#pop'), (r'[\(\[\{]', Operator, '#push'), - (ur'(\.|%s|%s|`[^`]+`)+' % (idrest, op), Keyword.Type) + (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) ], 'comment': [ (r'[^/\*]+', Comment.Multiline), @@ -1043,7 +1052,7 @@ class DylanLexer(RegexLexer): r'|v(ariable|irtual))\b', Name.Builtin), (r'<\w+>', Keyword.Type), (r'#?"(?:\\.|[^"])+?"', String.Double), - (r'//.*?\n', Comment), + (r'//.*?\n', Comment.Single), (r'/\*[\w\W]*?\*/', Comment.Multiline), (r'\'.*?\'', String.Single), (r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)' @@ -1086,12 +1095,13 @@ class ObjectiveCLexer(RegexLexer): (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation - (r'//(\n|(.|\n)*?[^\\]\n)', Comment), - (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), + (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ (r'(L|@)?"', String, 'string'), - (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", + String.Char), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), @@ -1107,47 +1117,52 @@ class ObjectiveCLexer(RegexLexer): r'@synthesize|@dynamic)\b', Keyword), (r'(int|long|float|short|double|char|unsigned|signed|void|' r'id|BOOL|IBOutlet|IBAction|SEL)\b', Keyword.Type), - (r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved), + (r'(_{0,2}inline|naked|restrict|thread|typename)\b', + Keyword.Reserved), (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|' r'declspec|finally|int64|try|leave)\b', Keyword.Reserved), (r'(TRUE|FALSE|nil|NULL)\b', Name.Builtin), - ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z$_][a-zA-Z0-9$_]*:(?!:)', Name.Label), + ('[a-zA-Z$_][a-zA-Z0-9$_]*', Name), ], 'root': [ include('whitespace'), # functions (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name + r'([a-zA-Z$_][a-zA-Z0-9$_]*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')({)', - bygroups(using(this), Name.Function, using(this), Text, Punctuation), + bygroups(using(this), Name.Function, + using(this), Text, Punctuation), 'function'), # function declarations (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name + r'([a-zA-Z$_][a-zA-Z0-9$_]*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(;)', - bygroups(using(this), Name.Function, using(this), Text, Punctuation)), - (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), 'classname'), - (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), 'forward_classname'), + bygroups(using(this), Name.Function, + using(this), Text, Punctuation)), + (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), + 'classname'), + (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), + 'forward_classname'), (r'(\s*)(@end)(\s*)', bygroups(Text, Keyword, Text)), ('', Text, 'statement'), ], 'classname' : [ # interface definition that inherits - ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*:\s*)([a-zA-Z_][a-zA-Z0-9_]*)?', + ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*:\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category - ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(\([a-zA-Z_][a-zA-Z0-9_]*\))', + ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(\([a-zA-Z$_][a-zA-Z0-9$_]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation - ('([a-zA-Z_][a-zA-Z0-9_]*)', Name.Class, '#pop') + ('([a-zA-Z$_][a-zA-Z0-9$_]*)', Name.Class, '#pop') ], 'forward_classname' : [ - ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*,\s*)', + ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*,\s*)', bygroups(Name.Class, Text), 'forward_classname'), - ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*;?)', + ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*;?)', bygroups(Name.Class, Text), '#pop') ], 'statement' : [ @@ -1172,19 +1187,25 @@ class ObjectiveCLexer(RegexLexer): ], 'macro': [ (r'[^/\n]+', Comment.Preproc), - (r'/[*](.|\n)*?[*]/', Comment), - (r'//.*?\n', Comment, '#pop'), + (r'/[*](.|\n)*?[*]/', Comment.Multiline), + (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ - (r'^\s*#if.*?(?>|<=?|>=?|==?|&&?|\^|\|\|?', + Operator), + (r'[?:]', Operator), # quick hack for ternary + (r'\bdefined\b', Operator), + (r'[;{}(),\[\]]', Punctuation), + #FIXME when e is present, no decimal point needed + (r'[+-]?\d*\.\d+([eE][-+]?\d+)?', Number.Float), + (r'[+-]?\d+\.\d*([eE][-+]?\d+)?', Number.Float), + (r'0[xX][0-9a-fA-F]*', Number.Hex), + (r'0[0-7]*', Number.Octal), + (r'[1-9][0-9]*', Number.Integer), + (r'\b(attribute|const|uniform|varying|centroid|break|continue|' + r'do|for|while|if|else|in|out|inout|float|int|void|bool|true|' + r'false|invariant|discard|return|mat[234]|mat[234]x[234]|' + r'vec[234]|[ib]vec[234]|sampler[123]D|samplerCube|' + r'sampler[12]DShadow|struct)\b', Keyword), + (r'\b(asm|class|union|enum|typedef|template|this|packed|goto|' + r'switch|default|inline|noinline|volatile|public|static|extern|' + r'external|interface|long|short|double|half|fixed|unsigned|' + r'lowp|mediump|highp|precision|input|output|hvec[234]|' + r'[df]vec[234]|sampler[23]DRect|sampler2DRectShadow|sizeof|' + r'cast|namespace|using)\b', Keyword), #future use + (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable), + (r'\.', Punctuation), + (r'\s+', Text), + ], + } + +class PrologLexer(RegexLexer): + """ + Lexer for Prolog files. + """ + name = 'Prolog' + aliases = ['prolog'] + filenames = ['*.prolog', '*.pro', '*.pl'] + mimetypes = ['text/x-prolog'] + + flags = re.UNICODE + + tokens = { + 'root': [ + (r'^#.*', Comment.Single), + (r'/\*', Comment.Multiline, 'nested-comment'), + (r'%.*', Comment.Single), + (r'[0-9]+', Number), + (r'[\[\](){}|.,;!]', Punctuation), + (r':-|-->', Punctuation), + (r'"(?:\\x[0-9a-fA-F]+\\|\\u[0-9a-fA-F]{4}|\U[0-9a-fA-F]{8}|' + r'\\[0-7]+\\|\\[\w\W]|[^"])*"', String.Double), + (r"'(?:''|[^'])*'", String.Atom), # quoted atom + # Needs to not be followed by an atom. + #(r'=(?=\s|[a-zA-Z\[])', Operator), + (r'(is|<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])', + Operator), + (r'(mod|div|not)\b', Operator), + (r'_', Keyword), # The don't-care variable + (r'([a-z]+)(:)', bygroups(Name.Namespace, Punctuation)), + (u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' + u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' + u'(\\s*)(:-|-->)', + bygroups(Name.Function, Text, Operator)), # function defn + (u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' + u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)' + u'(\\s*)(\\()', + bygroups(Name.Function, Text, Punctuation)), + (u'[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]' + u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*', + String.Atom), # atom, characters + # This one includes ! + (u'[#&*+\\-./:<=>?@\\\\^~\u00a1-\u00bf\u2010-\u303f]+', + String.Atom), # atom, graphics + (r'[A-Z_][A-Za-z0-9_]*', Name.Variable), + (u'\\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text), + ], + 'nested-comment': [ + (r'\*/', Comment.Multiline, '#pop'), + (r'/\*', Comment.Multiline, '#push'), + (r'[^*/]+', Comment.Multiline), + (r'[*/]', Comment.Multiline), + ], + } + + def analyse_text(text): + return ':-' in text + + +class CythonLexer(RegexLexer): + """ + For Pyrex and `Cython `_ source code. + + *New in Pygments 1.1.* + """ + + name = 'Cython' + aliases = ['cython', 'pyx'] + filenames = ['*.pyx', '*.pxd', '*.pxi'] + mimetypes = ['text/x-cython', 'application/x-cython'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), + (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'[^\S\n]+', Text), + (r'#.*$', Comment), + (r'[]{}:(),;[]', Punctuation), + (r'\\\n', Text), + (r'\\', Text), + (r'(in|is|and|or|not)\b', Operator.Word), + (r'(<)([a-zA-Z0-9.?]+)(>)', + bygroups(Punctuation, Keyword.Type, Punctuation)), + (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), + (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', + bygroups(Keyword, Number.Integer, Operator, Name, Operator, + Name, Punctuation)), + include('keywords'), + (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), + (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), + (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), + (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), + (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), + include('builtins'), + include('backtick'), + ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), + ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), + ('[uU]?"""', String, combined('stringescape', 'tdqs')), + ("[uU]?'''", String, combined('stringescape', 'tsqs')), + ('[uU]?"', String, combined('stringescape', 'dqs')), + ("[uU]?'", String, combined('stringescape', 'sqs')), + include('name'), + include('numbers'), + ], + 'keywords': [ + (r'(assert|break|by|continue|ctypedef|del|elif|else|except\??|exec|' + r'finally|for|gil|global|if|include|lambda|nogil|pass|print|raise|' + r'return|try|while|yield|as|with)\b', Keyword), + (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), + ], + 'builtins': [ + (r'(?/-]', Operator), + (r'(\[)(Compact|Immutable|(?:Boolean|Simple)Type)(\])', + bygroups(Punctuation, Name.Decorator, Punctuation)), + # TODO: "correctly" parse complex code attributes + (r'(\[)(CCode|(?:Integer|Floating)Type)', + bygroups(Punctuation, Name.Decorator)), + (r'[()\[\],.]', Punctuation), + (r'(as|base|break|case|catch|construct|continue|default|delete|do|' + r'else|enum|finally|for|foreach|get|if|in|is|lock|new|out|params|' + r'return|set|sizeof|switch|this|throw|try|typeof|while|yield)\b', + Keyword), + (r'(abstract|const|delegate|dynamic|ensures|extern|inline|internal|' + r'override|owned|private|protected|public|ref|requires|signal|' + r'static|throws|unowned|var|virtual|volatile|weak|yields)\b', + Keyword.Declaration), + (r'(namespace|using)(\s+)', bygroups(Keyword.Namespace, Text), + 'namespace'), + (r'(class|errordomain|interface|struct)(\s+)', + bygroups(Keyword.Declaration, Text), 'class'), + (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', + bygroups(Operator, Name.Attribute)), + # void is an actual keyword, others are in glib-2.0.vapi + (r'(void|bool|char|double|float|int|int8|int16|int32|int64|long|' + r'short|size_t|ssize_t|string|time_t|uchar|uint|uint8|uint16|' + r'uint32|uint64|ulong|unichar|ushort)\b', Keyword.Type), + (r'(true|false|null)\b', Name.Builtin), + ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ], + 'root': [ + include('whitespace'), + ('', Text, 'statement'), + ], + 'statement' : [ + include('whitespace'), + include('statements'), + ('[{}]', Punctuation), + (';', Punctuation, '#pop'), + ], + 'string': [ + (r'"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), + (r'[^\\"\n]+', String), # all other characters + (r'\\\n', String), # line continuation + (r'\\', String), # stray backslash + ], + 'if0': [ + (r'^\s*#if.*?(?`_ source code + + *New in Pygments 1.2.* + """ + name = 'Ooc' + aliases = ['ooc'] + filenames = ['*.ooc'] + mimetypes = ['text/x-ooc'] + + tokens = { + 'root': [ + (r'\b(class|interface|implement|abstract|extends|from|' + r'this|super|new|const|final|static|import|use|extern|' + r'inline|proto|break|continue|fallthrough|operator|if|else|for|' + r'while|do|switch|case|as|in|version|return|true|false|null)\b', + Keyword), + (r'include\b', Keyword, 'include'), + (r'(cover)([ \t]+)(from)([ \t]+)([a-zA-Z0-9_]+[*@]?)', + bygroups(Keyword, Text, Keyword, Text, Name.Class)), + (r'(func)((?:[ \t]|\\\n)+)(~[a-z_][a-zA-Z0-9_]*)', + bygroups(Keyword, Text, Name.Function)), + (r'\bfunc\b', Keyword), + # Note: %= and ^= not listed on http://ooc-lang.org/syntax + (r'//.*', Comment), + (r'(?s)/\*.*?\*/', Comment.Multiline), + (r'(==?|\+=?|-[=>]?|\*=?|/=?|:=|!=?|%=?|\?|>{1,3}=?|<{1,3}=?|\.\.|' + r'&&?|\|\|?|\^=?)', Operator), + (r'(\.)([ \t]*)([a-z]\w*)', bygroups(Operator, Text, + Name.Function)), + (r'[A-Z][A-Z0-9_]+', Name.Constant), + (r'[A-Z][a-zA-Z0-9_]*([@*]|\[[ \t]*\])?', Name.Class), + + (r'([a-z][a-zA-Z0-9_]*(?:~[a-z][a-zA-Z0-9_]*)?)((?:[ \t]|\\\n)*)(?=\()', + bygroups(Name.Function, Text)), + (r'[a-z][a-zA-Z0-9_]*', Name.Variable), + + # : introduces types + (r'[:(){}\[\];,]', Punctuation), + + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'0c[0-9]+', Number.Octal), + (r'0b[01]+', Number.Binary), + (r'[0-9_]\.[0-9_]*(?!\.)', Number.Float), + (r'[0-9_]+', Number.Decimal), + + (r'"(?:\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\"])*"', + String.Double), + (r"'(?:\\.|\\[0-9]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", + String.Char), + (r'@', Punctuation), # pointer dereference + (r'\.', Punctuation), # imports or chain operator + + (r'\\[ \t\n]', Text), + (r'[ \t]+', Text), + ], + 'include': [ + (r'[\w/]+', Name), + (r',', Punctuation), + (r'[ \t]', Text), + (r'[;\n]', Text, '#pop'), + ], + } + + +class GoLexer(RegexLexer): + """ + For `Go `_ source. + """ + name = 'Go' + filenames = ['*.go'] + aliases = ['go'] + mimetypes = ['text/x-gosrc'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), # line continuations + (r'//(.*?)\n', Comment.Single), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), + (r'(break|default|func|interface|select' + r'|case|defer|go|map|struct' + r'|chan|else|goto|package|switch' + r'|const|fallthrough|if|range|type' + r'|continue|for|import|return|var)\b', Keyword + ), + # It seems the builtin types aren't actually keywords. + (r'(uint8|uint16|uint32|uint64' + r'|int8|int16|int32|int64' + r'|float32|float64|byte' + r'|uint|int|float|uintptr' + r'|string|close|closed|len|cap|new|make)\b', Name.Builtin + ), + # float_lit + (r'\d+(\.\d+[eE][+\-]?\d+|' + r'\.\d*|[eE][+\-]?\d+)', Number.Float), + (r'\.\d+([eE][+\-]?\d+)?', Number.Float), + # int_lit + # -- octal_lit + (r'0[0-7]+', Number.Oct), + # -- hex_lit + (r'0[xX][0-9a-fA-F]+', Number.Hex), + # -- decimal_lit + (r'(0|[1-9][0-9]*)', Number.Integer), + # char_lit + (r"""'(\\['"\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}""" + r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|[^\\])'""", + String.Char + ), + # StringLiteral + # -- raw_string_lit + (r'`[^`]*`', String), + # -- interpreted_string_lit + (r'"(\\\\|\\"|[^"])*"', String), + # Tokens + (r'(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\|' + r'|<-|\+\+|--|==|!=|:=|\.\.\.)|[+\-*/%&|^<>=!()\[\]{}.,;:]', + Punctuation + ), + # identifier + (r'[a-zA-Z_]\w*', Name), + ] + } + + +class FelixLexer(RegexLexer): + """ + For `Felix `_ source code. + + *New in Pygments 1.2.* + """ + + name = 'Felix' + aliases = ['felix', 'flx'] + filenames = ['*.flx', '*.flxh'] + mimetypes = ['text/x-felix'] + + preproc = [ + 'elif', 'else', 'endif', 'if', 'ifdef', 'ifndef', + ] + + keywords = [ + '_', '_deref', 'all', 'as', + 'assert', 'attempt', 'call', 'callback', 'case', 'caseno', 'cclass', + 'code', 'compound', 'ctypes', 'do', 'done', 'downto', 'elif', 'else', + 'endattempt', 'endcase', 'endif', 'endmatch', 'enum', 'except', + 'exceptions', 'expect', 'finally', 'for', 'forall', 'forget', 'fork', + 'functor', 'goto', 'ident', 'if', 'incomplete', 'inherit', 'instance', + 'interface', 'jump', 'lambda', 'loop', 'match', 'module', 'namespace', + 'new', 'noexpand', 'nonterm', 'obj', 'of', 'open', 'parse', 'raise', + 'regexp', 'reglex', 'regmatch', 'rename', 'return', 'the', 'then', + 'to', 'type', 'typecase', 'typedef', 'typematch', 'typeof', 'upto', + 'when', 'whilst', 'with', 'yield', + ] + + keyword_directives = [ + '_gc_pointer', '_gc_type', 'body', 'comment', 'const', 'export', + 'header', 'inline', 'lval', 'macro', 'noinline', 'noreturn', + 'package', 'private', 'pod', 'property', 'public', 'publish', + 'requires', 'todo', 'virtual', 'use', + ] + + keyword_declarations = [ + 'def', 'let', 'ref', 'val', 'var', + ] + + keyword_types = [ + 'unit', 'void', 'any', 'bool', + 'byte', 'offset', + 'address', 'caddress', 'cvaddress', 'vaddress', + 'tiny', 'short', 'int', 'long', 'vlong', + 'utiny', 'ushort', 'vshort', 'uint', 'ulong', 'uvlong', + 'int8', 'int16', 'int32', 'int64', + 'uint8', 'uint16', 'uint32', 'uint64', + 'float', 'double', 'ldouble', + 'complex', 'dcomplex', 'lcomplex', + 'imaginary', 'dimaginary', 'limaginary', + 'char', 'wchar', 'uchar', + 'charp', 'charcp', 'ucharp', 'ucharcp', + 'string', 'wstring', 'ustring', + 'cont', + 'array', 'varray', 'list', + 'lvalue', 'opt', 'slice', + ] + + keyword_constants = [ + 'false', 'true', + ] + + operator_words = [ + 'and', 'not', 'in', 'is', 'isin', 'or', 'xor', + ] + + name_builtins = [ + '_svc', 'while', + ] + + name_pseudo = [ + 'root', 'self', 'this', + ] + + decimal_suffixes = '([tTsSiIlLvV]|ll|LL|([iIuU])(8|16|32|64))?' + + tokens = { + 'root': [ + include('whitespace'), + + # Keywords + (r'(axiom|ctor|fun|gen|proc|reduce|union)\b', Keyword, + 'funcname'), + (r'(class|cclass|cstruct|obj|struct)\b', Keyword, 'classname'), + (r'(instance|module|typeclass)\b', Keyword, 'modulename'), + + (r'(%s)\b' % '|'.join(keywords), Keyword), + (r'(%s)\b' % '|'.join(keyword_directives), Name.Decorator), + (r'(%s)\b' % '|'.join(keyword_declarations), Keyword.Declaration), + (r'(%s)\b' % '|'.join(keyword_types), Keyword.Type), + (r'(%s)\b' % '|'.join(keyword_constants), Keyword.Constant), + + # Operators + include('operators'), + + # Float Literal + # -- Hex Float + (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)' + r'[pP][+\-]?[0-9_]+[lLfFdD]?', Number.Float), + # -- DecimalFloat + (r'[0-9_]+(\.[0-9_]+[eE][+\-]?[0-9_]+|' + r'\.[0-9_]*|[eE][+\-]?[0-9_]+)[lLfFdD]?', Number.Float), + (r'\.(0|[1-9][0-9_]*)([eE][+\-]?[0-9_]+)?[lLfFdD]?', + Number.Float), + + # IntegerLiteral + # -- Binary + (r'0[Bb][01_]+%s' % decimal_suffixes, Number), + # -- Octal + (r'0[0-7_]+%s' % decimal_suffixes, Number.Oct), + # -- Hexadecimal + (r'0[xX][0-9a-fA-F_]+%s' % decimal_suffixes, Number.Hex), + # -- Decimal + (r'(0|[1-9][0-9_]*)%s' % decimal_suffixes, Number.Integer), + + # Strings + ('([rR][cC]?|[cC][rR])"""', String, 'tdqs'), + ("([rR][cC]?|[cC][rR])'''", String, 'tsqs'), + ('([rR][cC]?|[cC][rR])"', String, 'dqs'), + ("([rR][cC]?|[cC][rR])'", String, 'sqs'), + ('[cCfFqQwWuU]?"""', String, combined('stringescape', 'tdqs')), + ("[cCfFqQwWuU]?'''", String, combined('stringescape', 'tsqs')), + ('[cCfFqQwWuU]?"', String, combined('stringescape', 'dqs')), + ("[cCfFqQwWuU]?'", String, combined('stringescape', 'sqs')), + + # Punctuation + (r'[\[\]{}:(),;?]', Punctuation), + + # Labels + (r'[a-zA-Z_]\w*:>', Name.Label), + + # Identifiers + (r'(%s)\b' % '|'.join(name_builtins), Name.Builtin), + (r'(%s)\b' % '|'.join(name_pseudo), Name.Builtin.Pseudo), + (r'[a-zA-Z_]\w*', Name), + ], + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + + include('comment'), + + # Preprocessor + (r'#\s*if\s+0', Comment.Preproc, 'if0'), + (r'#', Comment.Preproc, 'macro'), + ], + 'operators': [ + (r'(%s)\b' % '|'.join(operator_words), Operator.Word), + (r'!=|==|<<|>>|\|\||&&|[-~+/*%=<>&^|.$]', Operator), + ], + 'comment': [ + (r'//(.*?)\n', Comment.Single), + (r'/[*]', Comment.Multiline, 'comment2'), + ], + 'comment2': [ + (r'[^\/*]', Comment.Multiline), + (r'/[*]', Comment.Multiline, '#push'), + (r'[*]/', Comment.Multiline, '#pop'), + (r'[\/*]', Comment.Multiline), + ], + 'if0': [ + (r'^\s*#if.*?(?]*?>)', + bygroups(Comment.Preproc, Text, String), '#pop'), + (r'(import|include)(\s+)("[^"]*?")', + bygroups(Comment.Preproc, Text, String), '#pop'), + (r"(import|include)(\s+)('[^']*?')", + bygroups(Comment.Preproc, Text, String), '#pop'), + (r'[^/\n]+', Comment.Preproc), + ##(r'/[*](.|\n)*?[*]/', Comment), + ##(r'//.*?\n', Comment, '#pop'), + (r'/', Comment.Preproc), + (r'(?<=\\)\n', Comment.Preproc), + (r'\n', Comment.Preproc, '#pop'), + ], + 'funcname': [ + include('whitespace'), + (r'[a-zA-Z_]\w*', Name.Function, '#pop'), + # anonymous functions + (r'(?=\()', Text, '#pop'), + ], + 'classname': [ + include('whitespace'), + (r'[a-zA-Z_]\w*', Name.Class, '#pop'), + # anonymous classes + (r'(?=\{)', Text, '#pop'), + ], + 'modulename': [ + include('whitespace'), + (r'\[', Punctuation, ('modulename2', 'tvarlist')), + (r'', Error, 'modulename2'), + ], + 'modulename2': [ + include('whitespace'), + (r'([a-zA-Z_]\w*)', Name.Namespace, '#pop:2'), + ], + 'tvarlist': [ + include('whitespace'), + include('operators'), + (r'\[', Punctuation, '#push'), + (r'\]', Punctuation, '#pop'), + (r',', Punctuation), + (r'(with|where)\b', Keyword), + (r'[a-zA-Z_]\w*', Name), + ], + 'stringescape': [ + (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + ], + 'strings': [ + (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + (r'[^\\\'"%\n]+', String), + # quotes, percents and backslashes must be parsed one at a time + (r'[\'"\\]', String), + # unhandled string formatting sign + (r'%', String) + # newlines are an error (use "nl" state) + ], + 'nl': [ + (r'\n', String) + ], + 'dqs': [ + (r'"', String, '#pop'), + # included here again for raw strings + (r'\\\\|\\"|\\\n', String.Escape), + include('strings') + ], + 'sqs': [ + (r"'", String, '#pop'), + # included here again for raw strings + (r"\\\\|\\'|\\\n", String.Escape), + include('strings') + ], + 'tdqs': [ + (r'"""', String, '#pop'), + include('strings'), + include('nl') + ], + 'tsqs': [ + (r"'''", String, '#pop'), + include('strings'), + include('nl') + ], + } + + +class AdaLexer(RegexLexer): + """ + For Ada source code. + + *New in Pygments 1.3.* + """ + + name = 'Ada' + aliases = ['ada', 'ada95' 'ada2005'] + filenames = ['*.adb', '*.ads', '*.ada'] + mimetypes = ['text/x-ada'] + + flags = re.MULTILINE | re.I # Ignore case + + _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' + + tokens = { + 'root': [ + (r'[^\S\n]+', Text), + (r'--.*?\n', Comment.Single), + (r'[^\S\n]+', Text), + (r'function|procedure|entry', Keyword.Declaration, 'subprogram'), + (r'(subtype|type)(\s+)([a-z0-9_]+)', + bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'), + (r'task|protected', Keyword.Declaration), + (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), + (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), + (r'(pragma)(\s+)([a-zA-Z0-9_]+)', bygroups(Keyword.Reserved, Text, + Comment.Preproc)), + (r'(true|false|null)\b', Keyword.Constant), + (r'(Byte|Character|Float|Integer|Long_Float|Long_Integer|' + r'Long_Long_Float|Long_Long_Integer|Natural|Positive|Short_Float|' + r'Short_Integer|Short_Short_Float|Short_Short_Integer|String|' + r'Wide_String|Duration)\b', Keyword.Type), + (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), + (r'generic|private', Keyword.Declaration), + (r'package', Keyword.Declaration, 'package'), + (r'array\b', Keyword.Reserved, 'array_def'), + (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), + (r'([a-z0-9_]+)(\s*)(:)(\s*)(constant)', + bygroups(Name.Constant, Text, Punctuation, Text, + Keyword.Reserved)), + (r'<<[a-z0-9_]+>>', Name.Label), + (r'([a-z0-9_]+)(\s*)(:)(\s*)(declare|begin|loop|for|while)', + bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)), + (r'\b(abort|abs|abstract|accept|access|aliased|all|array|at|begin|' + r'body|case|constant|declare|delay|delta|digits|do|else|elsif|end|' + r'entry|exception|exit|interface|for|goto|if|is|limited|loop|new|' + r'null|of|or|others|out|overriding|pragma|protected|raise|range|' + r'record|renames|requeue|return|reverse|select|separate|subtype|' + r'synchronized|task|tagged|terminate|then|type|until|when|while|' + r'xor)\b', + Keyword.Reserved), + (r'"[^"]*"', String), + include('attribute'), + include('numbers'), + (r"'[^']'", String.Character), + (r'([a-z0-9_]+)(\s*|[(,])', bygroups(Name, using(this))), + (r"(<>|=>|:=|[\(\)\|:;,.'])", Punctuation), + (r'[*<>+=/&-]', Operator), + (r'\n+', Text), + ], + 'numbers' : [ + (r'[0-9_]+#[0-9a-f]+#', Number.Hex), + (r'[0-9_]+\.[0-9_]*', Number.Float), + (r'[0-9_]+', Number.Integer), + ], + 'attribute' : [ + (r"(')([a-zA-Z0-9_]+)", bygroups(Punctuation, Name.Attribute)), + ], + 'subprogram' : [ + (r'\(', Punctuation, ('#pop', 'formal_part')), + (r';', Punctuation, '#pop'), + (r'is\b', Keyword.Reserved, '#pop'), + (r'"[^"]+"|[a-z0-9_]+', Name.Function), + include('root'), + ], + 'end' : [ + ('(if|case|record|loop|select)', Keyword.Reserved), + ('"[^"]+"|[a-zA-Z0-9_]+', Name.Function), + ('[\n\s]+', Text), + (';', Punctuation, '#pop'), + ], + 'type_def': [ + (r';', Punctuation, '#pop'), + (r'\(', Punctuation, 'formal_part'), + (r'with|and|use', Keyword.Reserved), + (r'array\b', Keyword.Reserved, ('#pop', 'array_def')), + (r'record\b', Keyword.Reserved, ('formal_part')), + include('root'), + ], + 'array_def' : [ + (r';', Punctuation, '#pop'), + (r'([a-z0-9_]+)(\s+)(range)', bygroups(Keyword.Type, Text, + Keyword.Reserved)), + include('root'), + ], + 'import': [ + (r'[a-z0-9_.]+', Name.Namespace, '#pop'), + ], + 'formal_part' : [ + (r'\)', Punctuation, '#pop'), + (r'([a-z0-9_]+)(\s*)(,|:[^=])', bygroups(Name.Variable, + Text, Punctuation)), + (r'(in|not|null|out|access)\b', Keyword.Reserved), + include('root'), + ], + 'package': [ + ('body', Keyword.Declaration), + ('is\s+new|renames', Keyword.Reserved), + ('is', Keyword.Reserved, '#pop'), + (';', Punctuation, '#pop'), + ('\(', Punctuation, 'package_instantiation'), + ('([a-zA-Z0-9_.]+)', Name.Class), + include('root'), + ], + 'package_instantiation': [ + (r'("[^"]+"|[a-z0-9_]+)(\s+)(=>)', bygroups(Name.Variable, + Text, Punctuation)), + (r'[a-z0-9._\'"]', Text), + (r'\)', Punctuation, '#pop'), + include('root'), + ], + } + + +class Modula2Lexer(RegexLexer): + """ + For `Modula-2 `_ source code. + + Additional options that determine which keywords are highlighted: + + `pim` + Select PIM Modula-2 dialect (default: True). + `iso` + Select ISO Modula-2 dialect (default: False). + `objm2` + Select Objective Modula-2 dialect (default: False). + `gm2ext` + Also highlight GNU extensions (default: False). + + *New in Pygments 1.3.* + """ + name = 'Modula-2' + aliases = ['modula2', 'm2'] + filenames = ['*.def', '*.mod'] + mimetypes = ['text/x-modula2'] + + flags = re.MULTILINE | re.DOTALL + + tokens = { + 'whitespace': [ + (r'\n+', Text), # blank lines + (r'\s+', Text), # whitespace + ], + 'identifiers': [ + (r'([a-zA-Z_\$][a-zA-Z0-9_\$]*)', Name), + ], + 'numliterals': [ + (r'[01]+B', Number.Binary), # binary number (ObjM2) + (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) + (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) + (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) + (r'[0-9A-F]+H', Number.Hex), # hexadecimal number + (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number + (r'[0-9]+\.[0-9]+', Number.Float), # real number + (r'[0-9]+', Number.Integer), # decimal whole number + ], + 'strings': [ + (r"'(\\\\|\\'|[^'])*'", String), # single quoted string + (r'"(\\\\|\\"|[^"])*"', String), # double quoted string + ], + 'operators': [ + (r'[*/+=#~&<>\^-]', Operator), + (r':=', Operator), # assignment + (r'@', Operator), # pointer deref (ISO) + (r'\.\.', Operator), # ellipsis or range + (r'`', Operator), # Smalltalk message (ObjM2) + (r'::', Operator), # type conversion (ObjM2) + ], + 'punctuation': [ + (r'[\(\)\[\]{},.:;|]', Punctuation), + ], + 'comments': [ + (r'//.*?\n', Comment.Single), # ObjM2 + (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 + (r'\(\*([^\$].*?)\*\)', Comment.Multiline), + # TO DO: nesting of (* ... *) comments + ], + 'pragmas': [ + (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM + (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 + ], + 'root': [ + include('whitespace'), + include('comments'), + include('pragmas'), + include('identifiers'), + include('numliterals'), + include('strings'), + include('operators'), + include('punctuation'), + ] + } + + pim_reserved_words = [ + # 40 reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', + 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', + 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', + 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', + 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', + 'UNTIL', 'VAR', 'WHILE', 'WITH', + ] + + pim_pervasives = [ + # 31 pervasives + 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', + 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', + 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', + 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', + ] + + iso_reserved_words = [ + # 46 reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', + 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', + 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', + 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', + 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', + 'WITH', + ] + + iso_pervasives = [ + # 42 pervasives + 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', + 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', + 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', + 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', + 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', + 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', + ] + + objm2_reserved_words = [ + # base language, 42 reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', + 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', + 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', + 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', + 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', + # OO extensions, 16 reserved words + 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', + 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', + 'SUPER', 'TRY', + ] + + objm2_pervasives = [ + # base language, 38 pervasives + 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', + 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', + 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', + 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', + 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', + # OO extensions, 3 pervasives + 'OBJECT', 'NO', 'YES', + ] + + gnu_reserved_words = [ + # 10 additional reserved words + 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', + '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', + ] + + gnu_pervasives = [ + # 21 identifiers, actually from pseudo-module SYSTEM + # but we will highlight them as if they were pervasives + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', + 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', + 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', + ] + + def __init__(self, **options): + self.reserved_words = set() + self.pervasives = set() + # ISO Modula-2 + if get_bool_opt(options, 'iso', False): + self.reserved_words.update(self.iso_reserved_words) + self.pervasives.update(self.iso_pervasives) + # Objective Modula-2 + elif get_bool_opt(options, 'objm2', False): + self.reserved_words.update(self.objm2_reserved_words) + self.pervasives.update(self.objm2_pervasives) + # PIM Modula-2 (DEFAULT) + else: + self.reserved_words.update(self.pim_reserved_words) + self.pervasives.update(self.pim_pervasives) + # GNU extensions + if get_bool_opt(options, 'gm2ext', False): + self.reserved_words.update(self.gnu_reserved_words) + self.pervasives.update(self.gnu_pervasives) + # initialise + RegexLexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + for index, token, value in \ + RegexLexer.get_tokens_unprocessed(self, text): + # check for reserved words and pervasives + if token is Name: + if value in self.reserved_words: + token = Keyword.Reserved + elif value in self.pervasives: + token = Keyword.Pervasive + # return result + yield index, token, value diff --git a/pygments_package/pygments/lexers/dotnet.py b/pygments_package/pygments/lexers/dotnet.py index 6b99000..0867cdb 100644 --- a/pygments_package/pygments/lexers/dotnet.py +++ b/pygments_package/pygments/lexers/dotnet.py @@ -5,18 +5,21 @@ Lexers for .net languages. - :copyright: 2006-2007 by Georg Brandl, Armin Ronacher. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -from pygments.lexer import RegexLexer, bygroups, using, this +from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, using, this from pygments.token import Punctuation, \ - Text, Comment, Operator, Keyword, Name, String, Number, Literal + Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other from pygments.util import get_choice_opt from pygments import unistring as uni -__all__ = ['CSharpLexer', 'BooLexer', 'VbNetLexer'] +from pygments.lexers.web import XmlLexer + +__all__ = ['CSharpLexer', 'BooLexer', 'VbNetLexer', 'CSharpAspxLexer', + 'VbNetAspxLexer'] def _escape(st): @@ -83,11 +86,11 @@ class CSharpLexer(RegexLexer): (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation - (r'//.*?\n', Comment), - (r'/[*](.|\n)*?[*]/', Comment), + (r'//.*?\n', Comment.Single), + (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), - (r'[{}]', Keyword), + (r'[{}]', Punctuation), (r'@"(\\\\|\\"|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), @@ -148,8 +151,8 @@ class BooLexer(RegexLexer): tokens = { 'root': [ (r'\s+', Text), - (r'(#|//).*$', Comment), - (r'/[*]', Comment, 'comment'), + (r'(#|//).*$', Comment.Single), + (r'/[*]', Comment.Multiline, 'comment'), (r'[]{}:(),.;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), @@ -286,3 +289,67 @@ class VbNetLexer(RegexLexer): (r'[a-z_][a-z0-9_.]*', Name.Namespace, '#pop') ], } + +class GenericAspxLexer(RegexLexer): + """ + Lexer for ASP.NET pages. + """ + + name = 'aspx-gen' + filenames = [] + mimetypes = [] + + flags = re.DOTALL + + tokens = { + 'root': [ + (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)), + (r'()(.*?)()', bygroups(using(XmlLexer), + Other, + using(XmlLexer))), + (r'(.+?)(?=<)', using(XmlLexer)), + (r'.+', using(XmlLexer)), + ], + } + +#TODO support multiple languages within the same source file +class CSharpAspxLexer(DelegatingLexer): + """ + Lexer for highligting C# within ASP.NET pages. + """ + + name = 'aspx-cs' + aliases = ['aspx-cs'] + filenames = ['*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'] + mimetypes = [] + + def __init__(self, **options): + super(CSharpAspxLexer, self).__init__(CSharpLexer,GenericAspxLexer, + **options) + + def analyse_text(text): + if re.search(r'Page\s*Language="C#"', text, re.I) is not None: + return 0.2 + elif re.search(r'script[^>]+language=["\']C#', text, re.I) is not None: + return 0.15 + return 0.001 # TODO really only for when filename matched... + +class VbNetAspxLexer(DelegatingLexer): + """ + Lexer for highligting Visual Basic.net within ASP.NET pages. + """ + + name = 'aspx-vb' + aliases = ['aspx-vb'] + filenames = ['*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'] + mimetypes = [] + + def __init__(self, **options): + super(VbNetAspxLexer, self).__init__(VbNetLexer,GenericAspxLexer, + **options) + + def analyse_text(text): + if re.search(r'Page\s*Language="Vb"', text, re.I) is not None: + return 0.2 + elif re.search(r'script[^>]+language=["\']vb', text, re.I) is not None: + return 0.15 diff --git a/pygments_package/pygments/lexers/functional.py b/pygments_package/pygments/lexers/functional.py index c885cff..ffbd753 100644 --- a/pygments_package/pygments/lexers/functional.py +++ b/pygments_package/pygments/lexers/functional.py @@ -5,25 +5,19 @@ Lexers for functional languages. - :copyright: 2006-2008 by Georg Brandl, Marek Kubica, - Adam Blinkinsop , Matteo Sasso. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set -from pygments.lexer import Lexer, RegexLexer, bygroups, using, this, include, \ - do_insertions +from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions from pygments.token import Text, Comment, Operator, Keyword, Name, \ - String, Number, Punctuation, Literal + String, Number, Punctuation, Literal, Generic __all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer', - 'OcamlLexer', 'ErlangLexer'] + 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer'] class SchemeLexer(RegexLexer): @@ -247,7 +241,7 @@ def get_tokens_unprocessed(self, text): (r'#\d*Y.*$', Comment.Special), # strings and characters - (r'"(\\.|[^"])*"', String), + (r'"(\\.|[^"\\])*"', String), # quoting (r":" + symbol, String.Symbol), (r"'" + symbol, String.Symbol), @@ -354,7 +348,7 @@ class HaskellLexer(RegexLexer): # Whitespace: (r'\s+', Text), #(r'--\s*|.*$', Comment.Doc), - (r'--.*$', Comment.Single), + (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), # Lexemes: # Identifiers @@ -445,7 +439,7 @@ class HaskellLexer(RegexLexer): (r'o[0-7]+', String.Escape, '#pop'), (r'x[\da-fA-F]+', String.Escape, '#pop'), (r'\d+', String.Escape, '#pop'), - (r'\n\s+\\', String.Escape, '#pop'), + (r'\s+\\', String.Escape, '#pop'), ], } @@ -476,7 +470,7 @@ def get_tokens_unprocessed(self, text): style = self.options.get('litstyle') if style is None: - style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird' + style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' code = '' insertions = [] @@ -486,7 +480,8 @@ def get_tokens_unprocessed(self, text): line = match.group() m = bird_re.match(line) if m: - insertions.append((len(code), [(0, Comment.Special, m.group(1))])) + insertions.append((len(code), + [(0, Comment.Special, m.group(1))])) code += m.group(2) else: insertions.append((len(code), [(0, Text, line)])) @@ -670,7 +665,7 @@ class ErlangLexer(RegexLexer): variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)' - escape_re = r'(?:\\(?:[bdefnrstv\'"\\]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' + escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' @@ -703,8 +698,9 @@ class ErlangLexer(RegexLexer): 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), - (r'~[0-9.*]*[~#+bBcefginpPswWxX]', String.Interpol), - (r'[^"\~]+', String), + (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), + (r'[^"\\~]+', String), + (r'~', String), ], 'directive': [ (r'(define)(\s*)(\()('+macro_re+r')', @@ -714,3 +710,47 @@ class ErlangLexer(RegexLexer): (atom_re, Name.Entity, '#pop'), ], } + + +class ErlangShellLexer(Lexer): + """ + Shell sessions in erl (for Erlang code). + + *New in Pygments 1.1.* + """ + name = 'Erlang erl session' + aliases = ['erl'] + filenames = ['*.erl-sh'] + mimetypes = ['text/x-erl-shellsession'] + + _prompt_re = re.compile(r'\d+>(?=\s|\Z)') + + def get_tokens_unprocessed(self, text): + erlexer = ErlangLexer(**self.options) + + curcode = '' + insertions = [] + for match in line_re.finditer(text): + line = match.group() + m = self._prompt_re.match(line) + if m is not None: + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + if line.startswith('*'): + yield match.start(), Generic.Traceback, line + else: + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + diff --git a/pygments_package/pygments/lexers/math.py b/pygments_package/pygments/lexers/math.py index aa3b2ad..448e299 100644 --- a/pygments_package/pygments/lexers/math.py +++ b/pygments_package/pygments/lexers/math.py @@ -5,25 +5,20 @@ Lexers for math languages. - :copyright: 2007-2008 by Christopher Creutzig, Ken Schutte, Stou Sandalski, - Laurent Gautier . - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set -from pygments.lexer import Lexer, RegexLexer, bygroups, include +from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions from pygments.token import Comment, String, Punctuation, Keyword, Name, \ Operator, Number, Text, Generic from pygments.lexers.agile import PythonLexer __all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'NumPyLexer', - 'SLexer'] + 'RConsoleLexer', 'SLexer'] class MuPADLexer(RegexLexer): @@ -159,14 +154,14 @@ class MatlabLexer(RegexLexer): (r'^\s*function', Keyword, 'deffunc'), # from 'iskeyword' on version 7.4.0.336 (R2007a): - (r'break|case|catch|classdef|continue|else|elseif|end|for|function|' - r'global|if|otherwise|parfor|persistent|return|switch|try|while', + (r'(break|case|catch|classdef|continue|else|elseif|end|for|function|' + r'global|if|otherwise|parfor|persistent|return|switch|try|while)\b', Keyword), - ("|".join(elfun+specfun+elmat), Name.Builtin), + ("(" + "|".join(elfun+specfun+elmat) + r')\b', Name.Builtin), # operators: - (r'-|==|~=|<|>|<=|>=|&&|&|~', Operator), + (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator), # operators requiring escape for re: (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator), @@ -175,22 +170,30 @@ class MatlabLexer(RegexLexer): (r'=|:|;', Punctuation), # quote can be transpose, instead of string: - (r'(\w+)(\')', bygroups(Text, Operator)), + # (not great, but handles common cases...) + (r'(?<=[\w\)\]])\'', Operator), - (r'\'', String, 'string'), + (r'(?') or line.startswith('+'): + # Colorize the prompt as such, + # then put rest of line into current_code_block + insertions.append((len(current_code_block), + [(0, Generic.Prompt, line[:2])])) + current_code_block += line[2:] + else: + # We have reached a non-prompt line! + # If we have stored prompt lines, need to process them first. + if current_code_block: + # Weave together the prompts and highlight code. + for item in do_insertions(insertions, + slexer.get_tokens_unprocessed(current_code_block)): + yield item + # Reset vars for next code block. + current_code_block = '' + insertions = [] + # Now process the actual line itself, this is output from R. + yield match.start(), Generic.Output, line + + # If we happen to end on a code block with nothing after it, need to + # process the last code block. This is neither elegant nor DRY so + # should be changed. + if current_code_block: + for item in do_insertions(insertions, + slexer.get_tokens_unprocessed(current_code_block)): + yield item + + class SLexer(RegexLexer): """ For S, S-plus, and R source code. @@ -362,21 +411,24 @@ class SLexer(RegexLexer): Keyword.Reserved) ], 'operators': [ - (r'<-|-|==|<=|>=|<|>|&&|&|!=', Operator), + (r'<-|-|==|<=|>=|<|>|&&|&|!=|\|\|?', Operator), (r'\*|\+|\^|/|%%|%/%|=', Operator), (r'%in%|%*%', Operator) ], 'builtin_symbols': [ - (r'NULL|NA|TRUE|FALSE', Keyword.Constant), + (r'(NULL|NA|TRUE|FALSE|NaN)\b', Keyword.Constant), + (r'(T|F)\b', Keyword.Variable), ], 'numbers': [ (r'(?, - Stou Sandalski, Paulo Moura, Clara Dimene, - Andreas Amann . - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, this, \ - do_insertions +from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ + this, do_insertions from pygments.token import Error, Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Generic from pygments.util import shebang_matches +from pygments.lexers.web import HtmlLexer __all__ = ['SqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'BrainfuckLexer', 'BashLexer', 'BatchLexer', 'BefungeLexer', 'RedcodeLexer', 'MOOCodeLexer', 'SmalltalkLexer', 'TcshLexer', 'LogtalkLexer', - 'GnuplotLexer', 'PovrayLexer', 'AppleScriptLexer'] + 'GnuplotLexer', 'PovrayLexer', 'AppleScriptLexer', + 'BashSessionLexer', 'ModelicaLexer', 'RebolLexer', 'ABAPLexer', + 'NewspeakLexer', 'GherkinLexer', 'AsymptoteLexer'] line_re = re.compile('.*?\n') @@ -80,9 +81,9 @@ class SqlLexer(RegexLexer): r'INCLUDING|INCREMENT|INDEX|INDITCATOR|INFIX|INHERITS|INITIALIZE|' r'INITIALLY|INNER|INOUT|INPUT|INSENSITIVE|INSERT|INSTANTIABLE|' r'INSTEAD|INTERSECT|INTO|INVOKER|IS|ISNULL|ISOLATION|ITERATE|JOIN|' - r'K|KEY|KEY_MEMBER|KEY_TYPE|LANCOMPILER|LANGUAGE|LARGE|LAST|' - r'LATERAL|LEADING|LEFT|LENGTH|LESS|LEVEL|LIKE|LILMIT|LISTEN|LOAD|' - r'LOCAL|LOCALTIME|LOCALTIMESTAMP|LOCATION|LOCATOR|LOCK|LOWER|M|' + r'KEY|KEY_MEMBER|KEY_TYPE|LANCOMPILER|LANGUAGE|LARGE|LAST|' + r'LATERAL|LEADING|LEFT|LENGTH|LESS|LEVEL|LIKE|LIMIT|LISTEN|LOAD|' + r'LOCAL|LOCALTIME|LOCALTIMESTAMP|LOCATION|LOCATOR|LOCK|LOWER|' r'MAP|MATCH|MAX|MAXVALUE|MESSAGE_LENGTH|MESSAGE_OCTET_LENGTH|' r'MESSAGE_TEXT|METHOD|MIN|MINUTE|MINVALUE|MOD|MODE|MODIFIES|' r'MODIFY|MONTH|MORE|MOVE|MUMPS|NAMES|NATIONAL|NATURAL|NCHAR|' @@ -317,16 +318,17 @@ class BefungeLexer(RegexLexer): } + class BashLexer(RegexLexer): """ - Lexer for (ba)sh shell scripts. + Lexer for (ba|k|)sh shell scripts. *New in Pygments 0.6.* """ name = 'Bash' - aliases = ['bash', 'sh'] - filenames = ['*.sh'] + aliases = ['bash', 'sh', 'ksh'] + filenames = ['*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass'] mimetypes = ['application/x-sh', 'application/x-shellscript'] tokens = { @@ -347,18 +349,18 @@ class BashLexer(RegexLexer): r'export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|' r'local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|' r'shopt|source|suspend|test|time|times|trap|true|type|typeset|' - r'ulimit|umask|unalias|unset|wait)\s*\b', + r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)', Name.Builtin), (r'#.*\n', Comment), (r'\\[\w\W]', String.Escape), (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)), - (r'[\[\]{}()=]+', Operator), + (r'[\[\]{}()=]', Operator), (r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String), (r'&&|\|\|', Operator), ], 'data': [ - (r'\$?"(\\\\|\\[0-7]+|\\.|[^"])*"', String.Double), - (r"\$?'(\\\\|\\[0-7]+|\\.|[^'])*'", String.Single), + (r'(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double), + (r"(?s)\$?'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), (r';', Text), (r'\s+', Text), (r'[^=\s\n\[\]{}()$"\'`\\<]+', Text), @@ -394,6 +396,57 @@ def analyse_text(text): return shebang_matches(text, r'(ba|z|)sh') +class BashSessionLexer(Lexer): + """ + Lexer for simplistic shell sessions. + + *New in Pygments 1.1.* + """ + + name = 'Bash Session' + aliases = ['console'] + filenames = ['*.sh-session'] + mimetypes = ['application/x-shell-session'] + + def get_tokens_unprocessed(self, text): + bashlexer = BashLexer(**self.options) + + pos = 0 + curcode = '' + insertions = [] + + for match in line_re.finditer(text): + line = match.group() + m = re.match(r'^((?:|sh\S*?|\w+\S+[@:]\S+(?:\s+\S+)?|\[\S+[@:]' + r'[^\n]+\].+)[$#%])(.*\n?)', line) + if m: + # To support output lexers (say diff output), the output + # needs to be broken by prompts whenever the output lexer + # changes. + if not insertions: + pos = match.start() + + insertions.append((len(curcode), + [(0, Generic.Prompt, m.group(1))])) + curcode += m.group(2) + elif line.startswith('>'): + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:1])])) + curcode += line[1:] + else: + if insertions: + toks = bashlexer.get_tokens_unprocessed(curcode) + for i, t, v in do_insertions(insertions, toks): + yield pos+i, t, v + yield match.start(), Generic.Output, line + insertions = [] + curcode = '' + if insertions: + for i, t, v in do_insertions(insertions, + bashlexer.get_tokens_unprocessed(curcode)): + yield pos+i, t, v + + class BatchLexer(RegexLexer): """ Lexer for the DOS/Windows Batch file format. @@ -528,6 +581,7 @@ class MOOCodeLexer(RegexLexer): ] } + class SmalltalkLexer(RegexLexer): """ For `Smalltalk `_ syntax. @@ -552,18 +606,21 @@ class SmalltalkLexer(RegexLexer): (r'\^|\:=|\_', Operator), # temporaries (r'[\]({}.;!]', Text), - + ], 'method definition' : [ # Not perfect can't allow whitespaces at the beginning and the # without breaking everything - (r'([a-zA-Z]+\w*:)(\s*)(\w+)', bygroups(Name.Function, Text, Name.Variable)), + (r'([a-zA-Z]+\w*:)(\s*)(\w+)', + bygroups(Name.Function, Text, Name.Variable)), (r'^(\b[a-zA-Z]+\w*\b)(\s*)$', bygroups(Name.Function, Text)), - (r'^([-+*/\\~<>=|&!?,@%]+)(\s*)(\w+)(\s*)$', bygroups(Name.Function, Text, Name.Variable, Text)), + (r'^([-+*/\\~<>=|&!?,@%]+)(\s*)(\w+)(\s*)$', + bygroups(Name.Function, Text, Name.Variable, Text)), ], 'blockvariables' : [ include('whitespaces'), - (r'(:)(\s*)([A-Za-z\w]+)', bygroups(Operator, Text, Name.Variable)), + (r'(:)(\s*)([A-Za-z\w]+)', + bygroups(Operator, Text, Name.Variable)), (r'\|', Operator, '#pop'), (r'', Text, '#pop'), # else pop ], @@ -576,15 +633,15 @@ class SmalltalkLexer(RegexLexer): ], '_parenth_helper' : [ include('whitespaces'), + (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number), (r'[-+*/\\~<>=|&#!?,@%\w+:]+', String.Symbol), # literals (r'\'[^\']*\'', String), (r'\$.', String.Char), - (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number), (r'#*\(', String.Symbol, 'inner_parenth'), ], 'parenth' : [ - # This state is a bit tricky since + # This state is a bit tricky since # we can't just pop this state (r'\)', String.Symbol, ('root','afterobject')), include('_parenth_helper'), @@ -601,16 +658,19 @@ class SmalltalkLexer(RegexLexer): 'objects' : [ (r'\[', Text, 'blockvariables'), (r'\]', Text, 'afterobject'), - (r'\b(self|super|true|false|nil|thisContext)\b', Name.Builtin.Pseudo, 'afterobject'), + (r'\b(self|super|true|false|nil|thisContext)\b', + Name.Builtin.Pseudo, 'afterobject'), (r'\b[A-Z]\w*(?!:)\b', Name.Class, 'afterobject'), (r'\b[a-z]\w*(?!:)\b', Name.Variable, 'afterobject'), - (r'#("[^"]*"|[-+*/\\~<>=|&!?,@%]+|[\w:]+)', String.Symbol, 'afterobject'), + (r'#("[^"]*"|[-+*/\\~<>=|&!?,@%]+|[\w:]+)', + String.Symbol, 'afterobject'), include('literals'), ], 'afterobject' : [ (r'! !$', Keyword , '#pop'), # squeak chunk delimeter include('whitespaces'), - (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)', Name.Builtin, '#pop'), + (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)', + Name.Builtin, '#pop'), (r'\b(new\b(?!:))', Name.Builtin), (r'\:=|\_', Operator, '#pop'), (r'\b[a-zA-Z]+\w*:', Name.Function, '#pop'), @@ -643,6 +703,7 @@ class SmalltalkLexer(RegexLexer): ], } + class TcshLexer(RegexLexer): """ Lexer for tcsh scripts. @@ -684,8 +745,8 @@ class TcshLexer(RegexLexer): (r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String), ], 'data': [ - (r'"(\\\\|\\[0-7]+|\\.|[^"])*"', String.Double), - (r"'(\\\\|\\[0-7]+|\\.|[^'])*'", String.Single), + (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double), + (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), (r'\s+', Text), (r'[^=\s\n\[\]{}()$"\'`\\]+', Text), (r'\d+(?= |\Z)', Number), @@ -747,7 +808,8 @@ class LogtalkLexer(RegexLexer): # Reflection (r'(current_predicate|predicate_property)(?=[(])', Keyword), # DCGs and term expansion - (r'(expand_term|(goal|term)_expansion|phrase)(?=[(])', Keyword), + (r'(expand_(goal|term)|(goal|term)_expansion|phrase)(?=[(])', + Keyword), # Entity (r'(abolish|c(reate|urrent))_(object|protocol|category)(?=[(])', Keyword), @@ -860,19 +922,22 @@ class LogtalkLexer(RegexLexer): ], 'directive': [ + # Conditional compilation directives + (r'(el)?if(?=[(])', Keyword, 'root'), + (r'(e(lse|ndif))[.]', Keyword, 'root'), # Entity directives (r'(category|object|protocol)(?=[(])', Keyword, 'entityrelations'), (r'(end_(category|object|protocol))[.]',Keyword, 'root'), # Predicate scope directives (r'(public|protected|private)(?=[(])', Keyword, 'root'), # Other directives - (r'e(ncoding|xport)(?=[(])', Keyword, 'root'), + (r'e(n(coding|sure_loaded)|xport)(?=[(])', Keyword, 'root'), (r'in(fo|itialization)(?=[(])', Keyword, 'root'), (r'(dynamic|synchronized|threaded)[.]', Keyword, 'root'), - (r'(alias|d(ynamic|iscontiguous)|m(eta_predicate|ode|ultifile)' - r'|synchronized)(?=[(])', Keyword, 'root'), + (r'(alias|d(ynamic|iscontiguous)|m(eta_predicate|ode|ultifile)|' + r's(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', Keyword, 'root'), (r'op(?=[(])', Keyword, 'root'), - (r'(calls|use(s|_module))(?=[(])', Keyword, 'root'), + (r'(calls|reexport|use(s|_module))(?=[(])', Keyword, 'root'), (r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'), (r'[a-z][a-zA-Z0-9_]*[.]', Text, 'root'), ], @@ -908,6 +973,15 @@ class LogtalkLexer(RegexLexer): ] } + def analyse_text(text): + if ':- object(' in text: + return True + if ':- protocol(' in text: + return True + if ':- category(' in text: + return True + return False + def _shortened(word): dpos = word.find('$') @@ -1090,7 +1164,7 @@ class PovrayLexer(RegexLexer): 'root': [ (r'/\*[\w\W]*?\*/', Comment.Multiline), (r'//.*\n', Comment.Single), - (r'"(?:\\.|[^"])+"', String.Double), + (r'(?s)"(?:\\.|[^"\\])+"', String.Double), (r'#(debug|default|else|end|error|fclose|fopen|if|ifdef|ifndef|' r'include|range|read|render|statistics|switch|undef|version|' r'warning|while|write|define|macro|local|declare)', @@ -1148,7 +1222,7 @@ class PovrayLexer(RegexLexer): r'light_source|merge|mesh|object|plane|poly|polygon|prism|' r'quadric|quartic|smooth_triangle|sor|sphere|superellipsoid|' r'text|torus|triangle|union', Name.Builtin), - #TODO: <=, etc + # TODO: <=, etc (r'[\[\](){}<>;,]', Punctuation), (r'[-+*/=]', Operator), (r'\b(x|y|z|u|v)\b', Name.Builtin.Pseudo), @@ -1479,3 +1553,745 @@ class AppleScriptLexer(RegexLexer): ('[*(]', Comment.Multiline), ], } + + +class ModelicaLexer(RegexLexer): + """ + For `Modelica `_ source code. + + *New in Pygments 1.1.* + """ + name = 'Modelica' + aliases = ['modelica'] + filenames = ['*.mo'] + mimetypes = ['text/x-modelica'] + + flags = re.IGNORECASE | re.DOTALL + + tokens = { + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), # line continuation + (r'//(\n|(.|\n)*?[^\\]\n)', Comment), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), + ], + 'statements': [ + (r'"', String, 'string'), + (r'(\d+\.\d*|\.\d+|\d+|\d.)[eE][+-]?\d+[lL]?', Number.Float), + (r'(\d+\.\d*|\.\d+)', Number.Float), + (r'\d+[Ll]?', Number.Integer), + (r'[~!%^&*+=|?:<>/-]', Operator), + (r'[()\[\]{},.;]', Punctuation), + (r'(true|false|NULL|Real|Integer|Boolean)\b', Name.Builtin), + (r"([a-zA-Z_][\w]*|'[a-zA-Z_\+\-\*\/\^][\w]*')" + r"(\.([a-zA-Z_][\w]*|'[a-zA-Z_\+\-\*\/\^][\w]*'))+", Name.Class), + (r"('[\w\+\-\*\/\^]+'|\w+)", Name) ], + 'root': [ + include('whitespace'), + include('keywords'), + include('functions'), + include('operators'), + include('classes'), + (r'("|)', Name.Tag, 'html-content'), + include('statements') + ], + 'keywords': [ + (r'(algorithm|annotation|break|connect|constant|constrainedby|' + r'discrete|each|else|elseif|elsewhen|encapsulated|enumeration|' + r'end|equation|exit|expandable|extends|' + r'external|false|final|flow|for|if|import|in|inner|input|' + r'loop|nondiscrete|outer|output|parameter|partial|' + r'protected|public|redeclare|replaceable|stream|time|then|true|' + r'when|while|within)\b', Keyword) + ], + 'functions': [ + (r'(abs|acos|acosh|asin|asinh|atan|atan2|atan3|ceil|cos|cosh|' + r'cross|div|exp|floor|log|log10|mod|rem|sign|sin|sinh|size|' + r'sqrt|tan|tanh|zeros)\b', Name.Function) + ], + 'operators': [ + (r'(and|assert|cardinality|change|delay|der|edge|initial|' + r'noEvent|not|or|pre|reinit|return|sample|smooth|' + r'terminal|terminate)\b', Name.Builtin) + ], + 'classes': [ + (r'(block|class|connector|function|model|package|' + r'record|type)\b', Name.Class) + ], + 'string': [ + (r'"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', + String.Escape), + (r'[^\\"\n]+', String), # all other characters + (r'\\\n', String), # line continuation + (r'\\', String) # stray backslash + ], + 'html-content': [ + (r'<\s*/\s*html\s*>', Name.Tag, '#pop'), + (r'.+?(?=<\s*/\s*html\s*>)', using(HtmlLexer)), + ] + } + + +class RebolLexer(RegexLexer): + """ + A `REBOL `_ lexer. + + *New in Pygments 1.1.* + """ + name = 'REBOL' + aliases = ['rebol'] + filenames = ['*.r', '*.r3'] + mimetypes = ['text/x-rebol'] + + flags = re.IGNORECASE | re.MULTILINE + + re.IGNORECASE + + escape_re = r'(?:\^\([0-9a-fA-F]{1,4}\)*)' + + def word_callback(lexer, match): + word = match.group() + + if re.match(".*:$", word): + yield match.start(), Generic.Subheading, word + elif re.match( + r'(native|alias|all|any|as-string|as-binary|bind|bound\?|case|' + r'catch|checksum|comment|debase|dehex|exclude|difference|disarm|' + r'either|else|enbase|foreach|remove-each|form|free|get|get-env|if|' + r'in|intersect|loop|minimum-of|maximum-of|mold|new-line|' + r'new-line\?|not|now|prin|print|reduce|compose|construct|repeat|' + r'reverse|save|script\?|set|shift|switch|throw|to-hex|trace|try|' + r'type\?|union|unique|unless|unprotect|unset|until|use|value\?|' + r'while|compress|decompress|secure|open|close|read|read-io|' + r'write-io|write|update|query|wait|input\?|exp|log-10|log-2|' + r'log-e|square-root|cosine|sine|tangent|arccosine|arcsine|' + r'arctangent|protect|lowercase|uppercase|entab|detab|connected\?|' + r'browse|launch|stats|get-modes|set-modes|to-local-file|' + r'to-rebol-file|encloak|decloak|create-link|do-browser|bind\?|' + r'hide|draw|show|size-text|textinfo|offset-to-caret|' + r'caret-to-offset|local-request-file|rgb-to-hsv|hsv-to-rgb|' + r'crypt-strength\?|dh-make-key|dh-generate-key|dh-compute-key|' + r'dsa-make-key|dsa-generate-key|dsa-make-signature|' + r'dsa-verify-signature|rsa-make-key|rsa-generate-key|' + r'rsa-encrypt)$', word): + yield match.start(), Name.Builtin, word + elif re.match( + r'(add|subtract|multiply|divide|remainder|power|and~|or~|xor~|' + r'minimum|maximum|negate|complement|absolute|random|head|tail|' + r'next|back|skip|at|pick|first|second|third|fourth|fifth|sixth|' + r'seventh|eighth|ninth|tenth|last|path|find|select|make|to|copy\*|' + r'insert|remove|change|poke|clear|trim|sort|min|max|abs|cp|' + r'copy)$', word): + yield match.start(), Name.Function, word + elif re.match( + r'(error|source|input|license|help|install|echo|Usage|with|func|' + r'throw-on-error|function|does|has|context|probe|\?\?|as-pair|' + r'mod|modulo|round|repend|about|set-net|append|join|rejoin|reform|' + r'remold|charset|array|replace|move|extract|forskip|forall|alter|' + r'first+|also|take|for|forever|dispatch|attempt|what-dir|' + r'change-dir|clean-path|list-dir|dirize|rename|split-path|delete|' + r'make-dir|delete-dir|in-dir|confirm|dump-obj|upgrade|what|' + r'build-tag|process-source|build-markup|decode-cgi|read-cgi|' + r'write-user|save-user|set-user-name|protect-system|parse-xml|' + r'cvs-date|cvs-version|do-boot|get-net-info|desktop|layout|' + r'scroll-para|get-face|alert|set-face|uninstall|unfocus|' + r'request-dir|center-face|do-events|net-error|decode-url|' + r'parse-header|parse-header-date|parse-email-addrs|import-email|' + r'send|build-attach-body|resend|show-popup|hide-popup|open-events|' + r'find-key-face|do-face|viewtop|confine|find-window|' + r'insert-event-func|remove-event-func|inform|dump-pane|dump-face|' + r'flag-face|deflag-face|clear-fields|read-net|vbug|path-thru|' + r'read-thru|load-thru|do-thru|launch-thru|load-image|' + r'request-download|do-face-alt|set-font|set-para|get-style|' + r'set-style|make-face|stylize|choose|hilight-text|hilight-all|' + r'unlight-text|focus|scroll-drag|clear-face|reset-face|scroll-face|' + r'resize-face|load-stock|load-stock-block|notify|request|flash|' + r'request-color|request-pass|request-text|request-list|' + r'request-date|request-file|dbug|editor|link-relative-path|' + r'emailer|parse-error)$', word): + yield match.start(), Keyword.Namespace, word + elif re.match( + r'(halt|quit|do|load|q|recycle|call|run|ask|parse|view|unview|' + r'return|exit|break)$', word): + yield match.start(), Name.Exception, word + elif re.match('REBOL$', word): + yield match.start(), Generic.Heading, word + elif re.match("to-.*", word): + yield match.start(), Keyword, word + elif re.match('(\+|-|\*|/|//|\*\*|and|or|xor|=\?|=|==|<>|<|>|<=|>=)$', + word): + yield match.start(), Operator, word + elif re.match(".*\?$", word): + yield match.start(), Keyword, word + elif re.match(".*\!$", word): + yield match.start(), Keyword.Type, word + elif re.match("'.*", word): + yield match.start(), Name.Variable.Instance, word # lit-word + elif re.match("#.*", word): + yield match.start(), Name.Label, word # issue + elif re.match("%.*", word): + yield match.start(), Name.Decorator, word # file + else: + yield match.start(), Name.Variable, word + + tokens = { + 'root': [ + (r'\s+', Text), + (r'#"', String.Char, 'char'), + (r'#{[0-9a-fA-F]*}', Number.Hex), + (r'2#{', Number.Hex, 'bin2'), + (r'64#{[0-9a-zA-Z+/=\s]*}', Number.Hex), + (r'"', String, 'string'), + (r'{', String, 'string2'), + (r';#+.*\n', Comment.Special), + (r';\*+.*\n', Comment.Preproc), + (r';.*\n', Comment), + (r'%"', Name.Decorator, 'stringFile'), + (r'%[^(\^{^")\s\[\]]+', Name.Decorator), + (r'<[a-zA-Z0-9:._-]*>', Name.Tag), + (r'<[^(<>\s")]+', Name.Tag, 'tag'), + (r'[+-]?([a-zA-Z]{1,3})?\$\d+(\.\d+)?', Number.Float), # money + (r'[+-]?\d+\:\d+(\:\d+)?(\.\d+)?', String.Other), # time + (r'\d+\-[0-9a-zA-Z]+\-\d+(\/\d+\:\d+(\:\d+)?' + r'([\.\d+]?([+-]?\d+:\d+)?)?)?', String.Other), # date + (r'\d+(\.\d+)+\.\d+', Keyword.Constant), # tuple + (r'\d+[xX]\d+', Keyword.Constant), # pair + (r'[+-]?\d+(\'\d+)?([\.,]\d*)?[eE][+-]?\d+', Number.Float), + (r'[+-]?\d+(\'\d+)?[\.,]\d*', Number.Float), + (r'[+-]?\d+(\'\d+)?', Number), + (r'[\[\]\(\)]', Generic.Strong), + (r'[a-zA-Z]+[^(\^{"\s:)]*://[^(\^{"\s)]*', Name.Decorator), # url + (r'mailto:[^(\^{"@\s)]+@[^(\^{"@\s)]+', Name.Decorator), # url + (r'[^(\^{"@\s)]+@[^(\^{"@\s)]+', Name.Decorator), # email + (r'comment\s', Comment, 'comment'), + (r'/[^(\^{^")\s/[\]]*', Name.Attribute), + (r'([^(\^{^")\s/[\]]+)(?=[:({"\s/\[\]])', word_callback), + (r'([^(\^{^")\s]+)', Text), + ], + 'string': [ + (r'[^(\^")]+', String), + (escape_re, String.Escape), + (r'[\(|\)]+', String), + (r'\^.', String.Escape), + (r'"', String, '#pop'), + ], + 'string2': [ + (r'[^(\^{^})]+', String), + (escape_re, String.Escape), + (r'[\(|\)]+', String), + (r'\^.', String.Escape), + (r'{', String, '#push'), + (r'}', String, '#pop'), + ], + 'stringFile': [ + (r'[^(\^")]+', Name.Decorator), + (escape_re, Name.Decorator), + (r'\^.', Name.Decorator), + (r'"', Name.Decorator, '#pop'), + ], + 'char': [ + (escape_re + '"', String.Char, '#pop'), + (r'\^."', String.Char, '#pop'), + (r'."', String.Char, '#pop'), + ], + 'tag': [ + (escape_re, Name.Tag), + (r'"', Name.Tag, 'tagString'), + (r'[^(<>\r\n")]+', Name.Tag), + (r'>', Name.Tag, '#pop'), + ], + 'tagString': [ + (r'[^(\^")]+', Name.Tag), + (escape_re, Name.Tag), + (r'[\(|\)]+', Name.Tag), + (r'\^.', Name.Tag), + (r'"', Name.Tag, '#pop'), + ], + 'tuple': [ + (r'(\d+\.)+', Keyword.Constant), + (r'\d+', Keyword.Constant, '#pop'), + ], + 'bin2': [ + (r'\s+', Number.Hex), + (r'([0-1]\s*){8}', Number.Hex), + (r'}', Number.Hex, '#pop'), + ], + 'comment': [ + (r'"', Comment, 'commentString1'), + (r'{', Comment, 'commentString2'), + (r'\[', Comment, 'commentBlock'), + (r'[^(\s{\"\[]+', Comment, '#pop'), + ], + 'commentString1': [ + (r'[^(\^")]+', Comment), + (escape_re, Comment), + (r'[\(|\)]+', Comment), + (r'\^.', Comment), + (r'"', Comment, '#pop'), + ], + 'commentString2': [ + (r'[^(\^{^})]+', Comment), + (escape_re, Comment), + (r'[\(|\)]+', Comment), + (r'\^.', Comment), + (r'{', Comment, '#push'), + (r'}', Comment, '#pop'), + ], + 'commentBlock': [ + (r'\[',Comment, '#push'), + (r'\]',Comment, '#pop'), + (r'[^(\[\])]*', Comment), + ], + } + + +class ABAPLexer(RegexLexer): + """ + Lexer for ABAP, SAP's integrated language. + + *New in Pygments 1.1.* + """ + name = 'ABAP' + aliases = ['abap'] + filenames = ['*.abap'] + mimetypes = ['text/x-abap'] + + flags = re.IGNORECASE | re.MULTILINE + + tokens = { + 'common': [ + (r'\s+', Text), + (r'^\*.*$', Comment.Single), + (r'\".*?\n', Comment.Single), + ], + 'variable-names': [ + (r'<[\S_]+>', Name.Variable), + (r'[\w][\w_~]*(?:(\[\])|->\*)?', Name.Variable), + ], + 'root': [ + include('common'), + #function calls + (r'(CALL\s+(?:BADI|CUSTOMER-FUNCTION|FUNCTION))(\s+)(\'?\S+\'?)', + bygroups(Keyword, Text, Name.Function)), + (r'(CALL\s+(?:DIALOG|SCREEN|SUBSCREEN|SELECTION-SCREEN|' + r'TRANSACTION|TRANSFORMATION))\b', + Keyword), + (r'(FORM|PERFORM)(\s+)([\w_]+)', + bygroups(Keyword, Text, Name.Function)), + (r'(PERFORM)(\s+)(\()([\w_]+)(\))', + bygroups(Keyword, Text, Punctuation, Name.Variable, Punctuation )), + (r'(MODULE)(\s+)(\S+)(\s+)(INPUT|OUTPUT)', + bygroups(Keyword, Text, Name.Function, Text, Keyword)), + + # method implementation + (r'(METHOD)(\s+)([\w_~]+)', + bygroups(Keyword, Text, Name.Function)), + # method calls + (r'(\s+)([\w_\-]+)([=\-]>)([\w_\-~]+)', + bygroups(Text, Name.Variable, Operator, Name.Function)), + # call methodnames returning style + (r'(?<=(=|-)>)([\w_\-~]+)(?=\()', Name.Function), + + # keywords with dashes in them. + # these need to be first, because for instance the -ID part + # of MESSAGE-ID wouldn't get highlighted if MESSAGE was + # first in the list of keywords. + (r'(ADD-CORRESPONDING|AUTHORITY-CHECK|' + r'CLASS-DATA|CLASS-EVENTS|CLASS-METHODS|CLASS-POOL|' + r'DELETE-ADJACENT|DIVIDE-CORRESPONDING|' + r'EDITOR-CALL|ENHANCEMENT-POINT|ENHANCEMENT-SECTION|EXIT-COMMAND|' + r'FIELD-GROUPS|FIELD-SYMBOLS|FUNCTION-POOL|' + r'INTERFACE-POOL|INVERTED-DATE|' + r'LOAD-OF-PROGRAM|LOG-POINT|' + r'MESSAGE-ID|MOVE-CORRESPONDING|MULTIPLY-CORRESPONDING|' + r'NEW-LINE|NEW-PAGE|NEW-SECTION|NO-EXTENSION|' + r'OUTPUT-LENGTH|PRINT-CONTROL|' + r'SELECT-OPTIONS|START-OF-SELECTION|SUBTRACT-CORRESPONDING|' + r'SYNTAX-CHECK|SYSTEM-EXCEPTIONS|' + r'TYPE-POOL|TYPE-POOLS' + r')\b', Keyword), + + # keyword kombinations + (r'CREATE\s+(PUBLIC|PRIVATE|DATA|OBJECT)|' + r'((PUBLIC|PRIVATE|PROTECTED)\s+SECTION|' + r'(TYPE|LIKE)(\s+(LINE\s+OF|REF\s+TO|' + r'(SORTED|STANDARD|HASHED)\s+TABLE\s+OF))?|' + r'FROM\s+(DATABASE|MEMORY)|CALL\s+METHOD|' + r'(GROUP|ORDER) BY|HAVING|SEPARATED BY|' + r'GET\s+(BADI|BIT|CURSOR|DATASET|LOCALE|PARAMETER|' + r'PF-STATUS|(PROPERTY|REFERENCE)\s+OF|' + r'RUN\s+TIME|TIME\s+(STAMP)?)?|' + r'SET\s+(BIT|BLANK\s+LINES|COUNTRY|CURSOR|DATASET|EXTENDED\s+CHECK|' + r'HANDLER|HOLD\s+DATA|LANGUAGE|LEFT\s+SCROLL-BOUNDARY|' + r'LOCALE|MARGIN|PARAMETER|PF-STATUS|PROPERTY\s+OF|' + r'RUN\s+TIME\s+(ANALYZER|CLOCK\s+RESOLUTION)|SCREEN|' + r'TITLEBAR|UPADTE\s+TASK\s+LOCAL|USER-COMMAND)|' + r'CONVERT\s+((INVERTED-)?DATE|TIME|TIME\s+STAMP|TEXT)|' + r'(CLOSE|OPEN)\s+(DATASET|CURSOR)|' + r'(TO|FROM)\s+(DATA BUFFER|INTERNAL TABLE|MEMORY ID|' + r'DATABASE|SHARED\s+(MEMORY|BUFFER))|' + r'DESCRIBE\s+(DISTANCE\s+BETWEEN|FIELD|LIST|TABLE)|' + r'FREE\s(MEMORY|OBJECT)?|' + r'PROCESS\s+(BEFORE\s+OUTPUT|AFTER\s+INPUT|' + r'ON\s+(VALUE-REQUEST|HELP-REQUEST))|' + r'AT\s+(LINE-SELECTION|USER-COMMAND|END\s+OF|NEW)|' + r'AT\s+SELECTION-SCREEN(\s+(ON(\s+(BLOCK|(HELP|VALUE)-REQUEST\s+FOR|' + r'END\s+OF|RADIOBUTTON\s+GROUP))?|OUTPUT))?|' + r'SELECTION-SCREEN:?\s+((BEGIN|END)\s+OF\s+((TABBED\s+)?BLOCK|LINE|' + r'SCREEN)|COMMENT|FUNCTION\s+KEY|' + r'INCLUDE\s+BLOCKS|POSITION|PUSHBUTTON|' + r'SKIP|ULINE)|' + r'LEAVE\s+(LIST-PROCESSING|PROGRAM|SCREEN|' + r'TO LIST-PROCESSING|TO TRANSACTION)' + r'(ENDING|STARTING)\s+AT|' + r'FORMAT\s+(COLOR|INTENSIFIED|INVERSE|HOTSPOT|INPUT|FRAMES|RESET)|' + r'AS\s+(CHECKBOX|SUBSCREEN|WINDOW)|' + r'WITH\s+(((NON-)?UNIQUE)?\s+KEY|FRAME)|' + r'(BEGIN|END)\s+OF|' + r'DELETE(\s+ADJACENT\s+DUPLICATES\sFROM)?|' + r'COMPARING(\s+ALL\s+FIELDS)?|' + r'INSERT(\s+INITIAL\s+LINE\s+INTO|\s+LINES\s+OF)?|' + r'IN\s+((BYTE|CHARACTER)\s+MODE|PROGRAM)|' + r'END-OF-(DEFINITION|PAGE|SELECTION)|' + r'WITH\s+FRAME(\s+TITLE)|' + + # simple kombinations + r'AND\s+(MARK|RETURN)|CLIENT\s+SPECIFIED|CORRESPONDING\s+FIELDS\s+OF|' + r'IF\s+FOUND|FOR\s+EVENT|INHERITING\s+FROM|LEAVE\s+TO\s+SCREEN|' + r'LOOP\s+AT\s+(SCREEN)?|LOWER\s+CASE|MATCHCODE\s+OBJECT|MODIF\s+ID|' + r'MODIFY\s+SCREEN|NESTING\s+LEVEL|NO\s+INTERVALS|OF\s+STRUCTURE|' + r'RADIOBUTTON\s+GROUP|RANGE\s+OF|REF\s+TO|SUPPRESS DIALOG|' + r'TABLE\s+OF|UPPER\s+CASE|TRANSPORTING\s+NO\s+FIELDS|' + r'VALUE\s+CHECK|VISIBLE\s+LENGTH|HEADER\s+LINE)\b', Keyword), + + # single word keywords. + (r'(^|(?<=(\s|\.)))(ABBREVIATED|ADD|ALIASES|APPEND|ASSERT|' + r'ASSIGN(ING)?|AT(\s+FIRST)?|' + r'BACK|BLOCK|BREAK-POINT|' + r'CASE|CATCH|CHANGING|CHECK|CLASS|CLEAR|COLLECT|COLOR|COMMIT|' + r'CREATE|COMMUNICATION|COMPONENTS?|COMPUTE|CONCATENATE|CONDENSE|' + r'CONSTANTS|CONTEXTS|CONTINUE|CONTROLS|' + r'DATA|DECIMALS|DEFAULT|DEFINE|DEFINITION|DEFERRED|DEMAND|' + r'DETAIL|DIRECTORY|DIVIDE|DO|' + r'ELSE(IF)?|ENDAT|ENDCASE|ENDCLASS|ENDDO|ENDFORM|ENDFUNCTION|' + r'ENDIF|ENDLOOP|ENDMETHOD|ENDMODULE|ENDSELECT|ENDTRY|' + r'ENHANCEMENT|EVENTS|EXCEPTIONS|EXIT|EXPORT|EXPORTING|EXTRACT|' + r'FETCH|FIELDS?|FIND|FOR|FORM|FORMAT|FREE|FROM|' + r'HIDE|' + r'ID|IF|IMPORT|IMPLEMENTATION|IMPORTING|IN|INCLUDE|INCLUDING|' + r'INDEX|INFOTYPES|INITIALIZATION|INTERFACE|INTERFACES|INTO|' + r'LENGTH|LINES|LOAD|LOCAL|' + r'JOIN|' + r'KEY|' + r'MAXIMUM|MESSAGE|METHOD[S]?|MINIMUM|MODULE|MODIFY|MOVE|MULTIPLY|' + r'NODES|' + r'OBLIGATORY|OF|OFF|ON|OVERLAY|' + r'PACK|PARAMETERS|PERCENTAGE|POSITION|PROGRAM|PROVIDE|PUBLIC|PUT|' + r'RAISE|RAISING|RANGES|READ|RECEIVE|REFRESH|REJECT|REPORT|RESERVE|' + r'RESUME|RETRY|RETURN|RETURNING|RIGHT|ROLLBACK|' + r'SCROLL|SEARCH|SELECT|SHIFT|SINGLE|SKIP|SORT|SPLIT|STATICS|STOP|' + r'SUBMIT|SUBTRACT|SUM|SUMMARY|SUMMING|SUPPLY|' + r'TABLE|TABLES|TIMES|TITLE|TO|TOP-OF-PAGE|TRANSFER|TRANSLATE|TRY|TYPES|' + r'ULINE|UNDER|UNPACK|UPDATE|USING|' + r'VALUE|VALUES|VIA|' + r'WAIT|WHEN|WHERE|WHILE|WITH|WINDOW|WRITE)\b', Keyword), + + # builtins + (r'(abs|acos|asin|atan|' + r'boolc|boolx|bit_set|' + r'char_off|charlen|ceil|cmax|cmin|condense|contains|' + r'contains_any_of|contains_any_not_of|concat_lines_of|cos|cosh|' + r'count|count_any_of|count_any_not_of|' + r'dbmaxlen|distance|' + r'escape|exp|' + r'find|find_end|find_any_of|find_any_not_of|floor|frac|from_mixed|' + r'insert|' + r'lines|log|log10|' + r'match|matches|' + r'nmax|nmin|numofchar|' + r'repeat|replace|rescale|reverse|round|' + r'segment|shift_left|shift_right|sign|sin|sinh|sqrt|strlen|' + r'substring|substring_after|substring_from|substring_before|substring_to|' + r'tan|tanh|to_upper|to_lower|to_mixed|translate|trunc|' + r'xstrlen)(\()\b', bygroups(Name.Builtin, Punctuation)), + + (r'&[0-9]', Name), + (r'[0-9]+', Number.Integer), + + # operators which look like variable names before + # parsing variable names. + (r'(?<=(\s|.))(AND|EQ|NE|GT|LT|GE|LE|CO|CN|CA|NA|CS|NOT|NS|CP|NP|' + r'BYTE-CO|BYTE-CN|BYTE-CA|BYTE-NA|BYTE-CS|BYTE-NS|' + r'IS\s+(NOT\s+)?(INITIAL|ASSIGNED|REQUESTED|BOUND))\b', Operator), + + include('variable-names'), + + # standard oparators after variable names, + # because < and > are part of field symbols. + (r'[?*<>=\-+]', Operator), + (r"'(''|[^'])*'", String.Single), + (r'[/;:()\[\],\.]', Punctuation) + ], + } + + +class NewspeakLexer(RegexLexer): + """ + For `Newspeak ` syntax. + """ + name = 'Newspeak' + filenames = ['*.ns2'] + aliases = ['newspeak', ] + mimetypes = ['text/x-newspeak'] + + tokens = { + 'root' : [ + (r'\b(Newsqueak2)\b',Keyword.Declaration), + (r"'[^']*'",String), + (r'\b(class)(\s+)([a-zA-Z0-9_]+)(\s*)', + bygroups(Keyword.Declaration,Text,Name.Class,Text)), + (r'\b(mixin|self|super|private|public|protected|nil|true|false)\b', + Keyword), + (r'([a-zA-Z0-9_]+\:)(\s*)([a-zA-Z_]\w+)', + bygroups(Name.Function,Text,Name.Variable)), + (r'([a-zA-Z0-9_]+)(\s*)(=)', + bygroups(Name.Attribute,Text,Operator)), + (r'<[a-zA-Z0-9_]+>', Comment.Special), + include('expressionstat'), + include('whitespace') + ], + + 'expressionstat': [ + (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), + (r'\d+', Number.Integer), + (r':\w+',Name.Variable), + (r'(\w+)(::)', bygroups(Name.Variable, Operator)), + (r'\w+:', Name.Function), + (r'\w+', Name.Variable), + (r'\(|\)', Punctuation), + (r'\[|\]', Punctuation), + (r'\{|\}', Punctuation), + + (r'(\^|\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-|:)', Operator), + (r'\.|;', Punctuation), + include('whitespace'), + include('literals'), + ], + 'literals': [ + (r'\$.', String), + (r"'[^']*'", String), + (r"#'[^']*'", String.Symbol), + (r"#\w+:?", String.Symbol), + (r"#(\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-)+", String.Symbol) + + ], + 'whitespace' : [ + (r'\s+', Text), + (r'"[^"]*"', Comment) + ] + } + +class GherkinLexer(RegexLexer): + """ + For `Gherkin ` syntax. + + *New in Pygments 1.2.* + """ + name = 'Gherkin' + aliases = ['Cucumber', 'cucumber', 'Gherkin', 'gherkin'] + filenames = ['*.feature'] + mimetypes = ['text/x-gherkin'] + + feature_keywords_regexp = ur'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функционалност|Функционал|Особина|Могућност|Özellik|Właściwość|Tính năng|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$' + scenario_keywords_regexp = ur'^(\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарио|Сценарий структураси|Сценарий|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Основа|Концепт|Контекст|Założenia|Tình huống|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' + examples_regexp = ur'^(\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$' + step_keywords_regexp = ur'^(\s*)(하지만|조건|만일|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Унда |То |Онда |Но |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Агар |А |Și |És |anrhegedig a |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Yna |Ya know how |Ya gotta |Y |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Donat |Donada |Diyelim ki |Dengan |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |But y\'all |But |Biết |Bet |BUT |Atunci |And y\'all |And |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' + + tokens = { + 'comments': [ + (r'#.*$', Comment) + ], + 'multiline_descriptions' : [ + (step_keywords_regexp, Keyword, "#pop"), + include('comments'), + (r"(\s|.)", Name.Constant) + ], + 'multiline_descriptions_on_stack' : [ + (step_keywords_regexp, Keyword, "#pop:2"), + include('comments'), + (r"(\s|.)", Name.Constant) + ], + 'scenario_table_description': [ + (r"\s+\|", Text, 'scenario_table_header'), + include('comments'), + (r"(\s|.)", Name.Constant) + ], + 'scenario_table_header': [ + (r"\s+\|\s*$", Text, "#pop:2"), + (r"(\s+\|\s*)(#.*)$", bygroups(Text, Comment), "#pop:2"), + include('comments'), + (r"\s+\|", Text), + (r"[^\|]", Name.Variable) + ], + 'scenario_sections_on_stack': [ + (scenario_keywords_regexp, + bygroups(Text, Name.Class, Name.Class, Name.Constant), + "multiline_descriptions_on_stack") + ], + 'narrative': [ + include('scenario_sections_on_stack'), + (r"(\s|.)", Name.Builtin) + ], + 'table_vars': [ + (r'(<[^>]*>)', bygroups(Name.Variable)) + ], + 'string': [ + include('table_vars'), + (r'(\s|.)', String), + ], + 'py_string': [ + (r'"""', String, "#pop"), + include('string'), + ], + 'double_string': [ + (r'"', String, "#pop"), + include('string'), + ], + 'root': [ + (r'\n', Text), + include('comments'), + (r'"""', String, "py_string"), + (r'"', String, "double_string"), + include('table_vars'), + (r'@[^@\s]+', Name.Namespace), + (step_keywords_regexp, bygroups(Text, Keyword)), + (feature_keywords_regexp, + bygroups(Name.Class, Name.Class, Name.Constant), 'narrative'), + (scenario_keywords_regexp, + bygroups(Text, Name.Class, Name.Class, Name.Constant), + "multiline_descriptions"), + (examples_regexp, + bygroups(Text, Name.Class, Name.Class, Name.Constant), + "scenario_table_description"), + (r'(\s|.)', Text) + ] + } + + +class AsymptoteLexer(RegexLexer): + """ + For `Asymptote `_ source code. + + *New in Pygments 1.2.* + """ + name = 'Asymptote' + aliases = ['asy', 'asymptote'] + filenames = ['*.asy'] + mimetypes = ['text/x-asymptote'] + + #: optional Comment or Whitespace + _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' + + tokens = { + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), # line continuation + (r'//(\n|(.|\n)*?[^\\]\n)', Comment), + (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), + ], + 'statements': [ + # simple string (TeX friendly) + (r'"(\\\\|\\"|[^"])*"', String), + # C style string (with character escapes) + (r"'", String, 'string'), + (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), + (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), + (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), + (r'0[0-7]+[Ll]?', Number.Oct), + (r'\d+[Ll]?', Number.Integer), + (r'[~!%^&*+=|?:<>/-]', Operator), + (r'[()\[\],.]', Punctuation), + (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)), + (r'(and|controls|tension|atleast|curl|if|else|while|for|do|' + r'return|break|continue|struct|typedef|new|access|import|' + r'unravel|from|include|quote|static|public|private|restricted|' + r'this|explicit|true|false|null|cycle|newframe|operator)\b', Keyword), + # Since an asy-type-name can be also an asy-function-name, + # in the following we test if the string " [a-zA-Z]" follows + # the Keyword.Type. + # Of course it is not perfect ! + (r'(Braid|FitResult|Label|Legend|TreeNode|abscissa|arc|arrowhead|' + r'binarytree|binarytreeNode|block|bool|bool3|bounds|bqe|circle|' + r'conic|coord|coordsys|cputime|ellipse|file|filltype|frame|grid3|' + r'guide|horner|hsv|hyperbola|indexedTransform|int|inversion|key|' + r'light|line|linefit|marginT|marker|mass|object|pair|parabola|path|' + r'path3|pen|picture|point|position|projection|real|revolution|' + r'scaleT|scientific|segment|side|slice|splitface|string|surface|' + r'tensionSpecifier|ticklocate|ticksgridT|tickvalues|transform|' + r'transformation|tree|triangle|trilinear|triple|vector|' + r'vertex|void)(?=([ ]{1,}[a-zA-Z]))', Keyword.Type), + # Now the asy-type-name which are not asy-function-name + # except yours ! + # Perhaps useless + (r'(Braid|FitResult|TreeNode|abscissa|arrowhead|block|bool|bool3|' + r'bounds|coord|frame|guide|horner|int|linefit|marginT|pair|pen|' + r'picture|position|real|revolution|slice|splitface|ticksgridT|' + r'tickvalues|tree|triple|vertex|void)\b', Keyword.Type), + ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label), + ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ], + 'root': [ + include('whitespace'), + # functions + (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments + r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name + r'(\s*\([^;]*?\))' # signature + r'(' + _ws + r')({)', + bygroups(using(this), Name.Function, using(this), using(this), + Punctuation), + 'function'), + # function declarations + (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments + r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name + r'(\s*\([^;]*?\))' # signature + r'(' + _ws + r')(;)', + bygroups(using(this), Name.Function, using(this), using(this), + Punctuation)), + ('', Text, 'statement'), + ], + 'statement' : [ + include('whitespace'), + include('statements'), + ('[{}]', Punctuation), + (';', Punctuation, '#pop'), + ], + 'function': [ + include('whitespace'), + include('statements'), + (';', Punctuation), + ('{', Punctuation, '#push'), + ('}', Punctuation, '#pop'), + ], + 'string': [ + (r"'", String, '#pop'), + (r'\\([\\abfnrtv"\'?]|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), + (r'\n', String), + (r"[^\\'\n]+", String), # all other characters + (r'\\\n', String), + (r'\\n', String), # line continuation + (r'\\', String), # stray backslash + ] + } + + def get_tokens_unprocessed(self, text): + from pygments.lexers._asybuiltins import ASYFUNCNAME, ASYVARNAME + for index, token, value in \ + RegexLexer.get_tokens_unprocessed(self, text): + if token is Name and value in ASYFUNCNAME: + token = Name.Function + elif token is Name and value in ASYVARNAME: + token = Name.Variable + yield index, token, value diff --git a/pygments_package/pygments/lexers/special.py b/pygments_package/pygments/lexers/special.py index b7b5843..bd200a7 100644 --- a/pygments_package/pygments/lexers/special.py +++ b/pygments_package/pygments/lexers/special.py @@ -5,8 +5,8 @@ Special lexers. - :copyright: 2006-2007 by Georg Brandl. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re @@ -14,7 +14,7 @@ from pygments.lexer import Lexer from pygments.token import Token, Error, Text -from pygments.util import get_choice_opt +from pygments.util import get_choice_opt, b __all__ = ['TextLexer', 'RawTokenLexer'] @@ -35,7 +35,7 @@ def get_tokens_unprocessed(self, text): _ttype_cache = {} -line_re = re.compile('.*?\n') +line_re = re.compile(b('.*?\n')) class RawTokenLexer(Lexer): """ @@ -60,6 +60,9 @@ def __init__(self, **options): Lexer.__init__(self, **options) def get_tokens(self, text): + if isinstance(text, unicode): + # raw token stream never has any non-ASCII characters + text = text.encode('ascii') if self.compress == 'gz': import gzip gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text)) @@ -70,7 +73,7 @@ def get_tokens(self, text): # do not call Lexer.get_tokens() because we do not want Unicode # decoding to occur, and stripping is not optional. - text = text.strip('\n') + '\n' + text = text.strip(b('\n')) + b('\n') for i, t, v in self.get_tokens_unprocessed(text): yield t, v @@ -78,7 +81,7 @@ def get_tokens_unprocessed(self, text): length = 0 for match in line_re.finditer(text): try: - ttypestr, val = match.group().split('\t', 1) + ttypestr, val = match.group().split(b('\t'), 1) except ValueError: val = match.group().decode(self.encoding) ttype = Error diff --git a/pygments_package/pygments/lexers/templates.py b/pygments_package/pygments/lexers/templates.py index fd84073..eb84745 100644 --- a/pygments_package/pygments/lexers/templates.py +++ b/pygments_package/pygments/lexers/templates.py @@ -5,16 +5,11 @@ Lexers for various template engines' markup. - :copyright: 2006-2008 by Armin Ronacher, Georg Brandl, Matt Good, - Ben Bangert. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set from pygments.lexers.web import \ PhpLexer, HtmlLexer, XmlLexer, JavascriptLexer, CssLexer @@ -38,7 +33,9 @@ 'MyghtyCssLexer', 'MyghtyJavascriptLexer', 'MakoLexer', 'MakoHtmlLexer', 'MakoXmlLexer', 'MakoJavascriptLexer', 'MakoCssLexer', 'JspLexer', 'CheetahLexer', 'CheetahHtmlLexer', - 'CheetahXmlLexer', 'CheetahJavascriptLexer'] + 'CheetahXmlLexer', 'CheetahJavascriptLexer', + 'EvoqueLexer', 'EvoqueHtmlLexer', 'EvoqueXmlLexer', + 'ColdfusionLexer', 'ColdfusionHtmlLexer'] class ErbLexer(Lexer): @@ -239,13 +236,14 @@ class DjangoLexer(RegexLexer): bygroups(Keyword, Text, Keyword, Text, Name.Function)), (r'(_|true|false|none|True|False|None)\b', Keyword.Pseudo), (r'(in|as|reversed|recursive|not|and|or|is|if|else|import|' - r'with(?:(?:out)?\s*context)?)\b', Keyword), + r'with(?:(?:out)?\s*context)?|scoped|ignore\s+missing)\b', + Keyword), (r'(loop|block|super|forloop)\b', Name.Builtin), (r'[a-zA-Z][a-zA-Z0-9_]*', Name.Variable), (r'\.[a-zA-Z0-9_]+', Name.Variable), (r':?"(\\\\|\\"|[^"])*"', String.Double), (r":?'(\\\\|\\'|[^'])*'", String.Single), - (r'([{}()\[\]+\-*/,:]|[><=]=?)', Operator), + (r'([{}()\[\]+\-*/,:~]|[><=]=?)', Operator), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), ], @@ -412,28 +410,30 @@ class MakoLexer(RegexLexer): bygroups(Text, Comment.Preproc, Keyword, Other)), (r'(\s*)(%)([^\n]*)(\n|\Z)', bygroups(Text, Comment.Preproc, using(PythonLexer), Other)), - (r'(\s*)(#[^\n]*)(\n|\Z)', - bygroups(Text, Comment.Preproc, Other)), - (r'(<%)(def|call|namespace|text)', + (r'(\s*)(##[^\n]*)(\n|\Z)', + bygroups(Text, Comment.Preproc, Other)), + (r'(?s)<%doc>.*?', Comment.Preproc), + (r'(<%)([\w\.\:]+)', bygroups(Comment.Preproc, Name.Builtin), 'tag'), - (r'()', + (r'()', bygroups(Comment.Preproc, Name.Builtin, Comment.Preproc)), - (r'<%(?=(include|inherit|namespace|page))', Comment.Preproc, 'ondeftags'), + (r'<%(?=([\w\.\:]+))', Comment.Preproc, 'ondeftags'), (r'(<%(?:!?))(.*?)(%>)(?s)', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), - (r'(\$\{!?)(.*?)(\})(?s)', + (r'(\$\{)(.*?)(\})', bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), (r'''(?sx) - (.+?) # anything, followed by: + (.+?) # anything, followed by: (?: - (?<=\n)(?=[%#]) | # an eval or comment line - (?=' in text: rv += 0.1 return rv + + +class EvoqueLexer(RegexLexer): + """ + For files using the Evoque templating system. + + *New in Pygments 1.1.* + """ + name = 'Evoque' + aliases = ['evoque'] + filenames = ['*.evoque'] + mimetypes = ['application/x-evoque'] + + flags = re.DOTALL + + tokens = { + 'root': [ + (r'[^#$]+', Other), + (r'#\[', Comment.Multiline, 'comment'), + (r'\$\$', Other), + # svn keywords + (r'\$\w+:[^$\n]*\$', Comment.Multiline), + # directives: begin, end + (r'(\$)(begin|end)(\{(%)?)(.*?)((?(4)%)\})', + bygroups(Punctuation, Name.Builtin, Punctuation, None, + String, Punctuation, None)), + # directives: evoque, overlay + # see doc for handling first name arg: /directives/evoque/ + #+ minor inconsistency: the "name" in e.g. $overlay{name=site_base} + # should be using(PythonLexer), not passed out as String + (r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?' + r'(.*?)((?(4)%)\})', + bygroups(Punctuation, Name.Builtin, Punctuation, None, + String, using(PythonLexer), Punctuation, None)), + # directives: if, for, prefer, test + (r'(\$)(\w+)(\{(%)?)(.*?)((?(4)%)\})', + bygroups(Punctuation, Name.Builtin, Punctuation, None, + using(PythonLexer), Punctuation, None)), + # directive clauses (no {} expression) + (r'(\$)(else|rof|fi)', bygroups(Punctuation, Name.Builtin)), + # expressions + (r'(\$\{(%)?)(.*?)((!)(.*?))?((?(2)%)\})', + bygroups(Punctuation, None, using(PythonLexer), + Name.Builtin, None, None, Punctuation, None)), + (r'#', Other), + ], + 'comment': [ + (r'[^\]#]', Comment.Multiline), + (r'#\[', Comment.Multiline, '#push'), + (r'\]#', Comment.Multiline, '#pop'), + (r'[\]#]', Comment.Multiline) + ], + } + +class EvoqueHtmlLexer(DelegatingLexer): + """ + Subclass of the `EvoqueLexer` that highlights unlexed data with the + `HtmlLexer`. + + *New in Pygments 1.1.* + """ + name = 'HTML+Evoque' + aliases = ['html+evoque'] + filenames = ['*.html'] + mimetypes = ['text/html+evoque'] + + def __init__(self, **options): + super(EvoqueHtmlLexer, self).__init__(HtmlLexer, EvoqueLexer, + **options) + +class EvoqueXmlLexer(DelegatingLexer): + """ + Subclass of the `EvoqueLexer` that highlights unlexed data with the + `XmlLexer`. + + *New in Pygments 1.1.* + """ + name = 'XML+Evoque' + aliases = ['xml+evoque'] + filenames = ['*.xml'] + mimetypes = ['application/xml+evoque'] + + def __init__(self, **options): + super(EvoqueXmlLexer, self).__init__(XmlLexer, EvoqueLexer, + **options) + +class ColdfusionLexer(RegexLexer): + """ + Coldfusion statements + """ + name = 'cfstatement' + aliases = ['cfs'] + filenames = [] + mimetypes = [] + flags = re.IGNORECASE | re.MULTILINE + + tokens = { + 'root': [ + (r'//.*', Comment), + (r'\+\+|--', Operator), + (r'[-+*/^&=!]', Operator), + (r'<=|>=|<|>', Operator), + (r'mod\b', Operator), + (r'(eq|lt|gt|lte|gte|not|is|and|or)\b', Operator), + (r'\|\||&&', Operator), + (r'"', String.Double, 'string'), + # There is a special rule for allowing html in single quoted + # strings, evidently. + (r"'.*?'", String.Single), + (r'\d+', Number), + (r'(if|else|len|var|case|default|break|switch)\b', Keyword), + (r'([A-Za-z_$][A-Za-z0-9_.]*)\s*(\()', bygroups(Name.Function, Punctuation)), + (r'[A-Za-z_$][A-Za-z0-9_.]*', Name.Variable), + (r'[()\[\]{};:,.\\]', Punctuation), + (r'\s+', Text), + ], + 'string': [ + (r'""', String.Double), + (r'#.+?#', String.Interp), + (r'[^"#]+', String.Double), + (r'#', String.Double), + (r'"', String.Double, '#pop'), + ], + } + +class ColdfusionMarkupLexer(RegexLexer): + """ + Coldfusion markup only + """ + name = 'Coldfusion' + aliases = ['cf'] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + (r'[^<]+', Other), + include('tags'), + (r'<[^<>]*', Other), + ], + 'tags': [ + (r'(?s)', Comment.Multiline), + (r'(?s)', Comment), + (r'', Name.Builtin, 'cfoutput'), + (r'(?s)()(.+?)()', + bygroups(Name.Builtin, using(ColdfusionLexer), Name.Builtin)), + # negative lookbehind is for strings with embedded > + (r'(?s)()', + bygroups(Name.Builtin, using(ColdfusionLexer), Name.Builtin)), + ], + 'cfoutput': [ + (r'[^#<]+', Other), + (r'(#)(.*?)(#)', bygroups(Punctuation, using(ColdfusionLexer), + Punctuation)), + #(r'', Name.Builtin, '#push'), + (r'', Name.Builtin, '#pop'), + include('tags'), + (r'(?s)<[^<>]*', Other), + (r'#', Other), + ], + } + + +class ColdfusionHtmlLexer(DelegatingLexer): + """ + Coldfusion markup in html + """ + name = 'Coldufsion HTML' + aliases = ['cfm'] + filenames = ['*.cfm', '*.cfml', '*.cfc'] + mimetypes = ['application/x-coldfusion'] + + def __init__(self, **options): + super(ColdfusionHtmlLexer, self).__init__(HtmlLexer, ColdfusionMarkupLexer, + **options) + diff --git a/pygments_package/pygments/lexers/text.py b/pygments_package/pygments/lexers/text.py index cd970ae..6b22370 100644 --- a/pygments_package/pygments/lexers/text.py +++ b/pygments_package/pygments/lexers/text.py @@ -5,23 +5,11 @@ Lexers for non-source code file types. - :copyright: 2006-2008 by Armin Ronacher, Georg Brandl, - Tim Hatch , - Ronny Pfannschmidt, - Dennis Kaarsemaker, - Kumar Appaiah , - Varun Hiremath , - Jeremy Thurgood, - Max Battcher , - Kirill Simonov . - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set from bisect import bisect from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \ @@ -36,7 +24,7 @@ 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer', 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer', - 'LighttpdConfLexer', 'NginxConfLexer'] + 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer'] class IniLexer(RegexLexer): @@ -54,7 +42,7 @@ class IniLexer(RegexLexer): (r'\s+', Text), (r'[;#].*?$', Comment), (r'\[.*?\]$', Keyword), - (r'(.*?)(\s*)(=)(\s*)(.*?)$', + (r'(.*?)([ \t]*)(=)([ \t]*)(.*?)$', bygroups(Name.Attribute, Text, Operator, Text, String)) ] } @@ -179,12 +167,12 @@ class BaseMakefileLexer(RegexLexer): (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), + (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), # targets (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), 'block-header'), - #TODO: add paren handling (grr) + # TODO: add paren handling (grr) ], 'export': [ (r'[a-zA-Z0-9_${}-]+', Name.Variable), @@ -219,7 +207,7 @@ class DiffLexer(RegexLexer): (r'-.*\n', Generic.Deleted), (r'!.*\n', Generic.Strong), (r'@.*\n', Generic.Subheading), - (r'(Index|diff).*\n', Generic.Heading), + (r'([Ii]ndex|diff).*\n', Generic.Heading), (r'=.*\n', Generic.Heading), (r'.*\n', Text), ] @@ -233,6 +221,7 @@ def analyse_text(text): if text[:4] == '--- ': return 0.9 + DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move', 'replace'] @@ -254,10 +243,12 @@ class DarcsPatchLexer(RegexLexer): (r'>', Operator), (r'{', Operator), (r'}', Operator), - (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', bygroups(Operator, Keyword, Name, Text, - Name, Operator, Literal.Date, Text, Operator)), - (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', bygroups(Operator, Keyword, Name, Text, - Name, Operator, Literal.Date, Text), 'comment'), + (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', + bygroups(Operator, Keyword, Name, Text, Name, Operator, + Literal.Date, Text, Operator)), + (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', + bygroups(Operator, Keyword, Name, Text, Name, Operator, + Literal.Date, Text), 'comment'), (r'New patches:', Generic.Heading), (r'Context:', Generic.Heading), (r'Patch bundle hash:', Generic.Heading), @@ -361,12 +352,23 @@ class BBCodeLexer(RegexLexer): mimetypes = ['text/x-bbcode'] tokens = { - 'root' : [ - (r'[\s\w]+', Text), - (r'(\[)(/?[^\]\n\r=]+)(\])', - bygroups(Keyword, Keyword.Pseudo, Keyword)), - (r'(\[)([^\]\n\r=]+)(=)([^\]\n\r]+)(\])', - bygroups(Keyword, Keyword.Pseudo, Operator, String, Keyword)), + 'root': [ + (r'[^[]+', Text), + # tag/end tag begin + (r'\[/?\w+', Keyword, 'tag'), + # stray bracket + (r'\[', Text), + ], + 'tag': [ + (r'\s+', Text), + # attribute with value + (r'(\w+)(=)("?[^\s"\]]+"?)', + bygroups(Name.Attribute, Operator, String)), + # tag argument (a la [color=green]) + (r'(=)("?[^\s"\]]+"?)', + bygroups(Operator, String)), + # tag end + (r'\]', Keyword, '#pop'), ], } @@ -473,7 +475,7 @@ class GroffLexer(RegexLexer): } def analyse_text(text): - if text[0] != '.': + if text[:1] != '.': return False if text[:3] == '.\\"': return True @@ -632,7 +634,8 @@ def _handle_sourcecode(self, match): tokens = { 'root': [ # Heading with overline - (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)', + (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' + r'(.+)(\n)(\1)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text, Generic.Heading, Text)), # Plain heading @@ -652,24 +655,33 @@ def _handle_sourcecode(self, match): bygroups(Text, Number, using(this, state='inline'))), (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', bygroups(Text, Number, using(this, state='inline'))), + # Line blocks + (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', + bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)' r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', _handle_sourcecode), # A directive - (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?', - bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)), + (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, + using(this, state='inline'))), # A reference target (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A footnote target (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A substitution def + (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, + Punctuation, Text, using(this, state='inline'))), # Comments (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), # Field list - (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text, - Name.Function)), + (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)), + (r'^( *)(:.*?:)([ \t]+)(.*?)$', + bygroups(Text, Name.Class, Text, Name.Function)), # Definition list (r'^([^ ].*(?)(`__?)', # reference with inline target + bygroups(String, String.Interpol, String)), + (r'`.+?`__?', String), # reference + (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', bygroups(Name.Variable, Name.Attribute)), # role - (r'(:[a-zA-Z0-9-]+?:)(`.+?`)', - bygroups(Name.Attribute, Name.Variable)), # user-defined role + (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', + bygroups(Name.Attribute, Name.Variable)), # role (content first) (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis (r'\*.+?\*', Generic.Emph), # Emphasis (r'\[.*?\]_', String), # Footnote or citation @@ -717,6 +730,7 @@ def analyse_text(text): text[p1+1] == text[p2-1]): # ...a sufficiently high header return 0.5 + class VimLexer(RegexLexer): """ Lexer for VimL script files. @@ -826,6 +840,7 @@ class GettextLexer(RegexLexer): ] } + class SquidConfLexer(RegexLexer): """ Lexer for `squid `_ configuration files. @@ -1005,6 +1020,7 @@ class DebianControlLexer(RegexLexer): (r'[}]', Text), (r'[^,]$', Name.Function, '#pop'), (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function), + (r'\[.*?\]', Name.Entity), ], 'depend_vers': [ (r'\),', Text, '#pop'), @@ -1425,6 +1441,7 @@ def get_tokens_unprocessed(self, text=None, context=None): context = YamlLexerContext(text, 0) return super(YamlLexer, self).get_tokens_unprocessed(text, context) + class LighttpdConfLexer(RegexLexer): """ Lexer for `Lighttpd `_ configuration files. @@ -1452,6 +1469,7 @@ class LighttpdConfLexer(RegexLexer): } + class NginxConfLexer(RegexLexer): """ Lexer for `Nginx `_ configuration files. @@ -1493,5 +1511,76 @@ class NginxConfLexer(RegexLexer): (r'[^\s;#{}$]+', String), # catch all (r'/[^\s;#]*', Name), # pathname (r'\s+', Text), + (r'[$;]', Text), # leftover characters ], } + + +class CMakeLexer(RegexLexer): + """ + Lexer for `CMake `_ files. + + *New in Pygments 1.2.* + """ + name = 'CMake' + aliases = ['cmake'] + filenames = ['*.cmake'] + mimetypes = ['text/x-cmake'] + + tokens = { + 'root': [ + #(r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|' + # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|' + # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|' + # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|' + # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|' + # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|' + # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|' + # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|' + # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|' + # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|' + # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|' + # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|' + # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|' + # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|' + # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|' + # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|' + # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|' + # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|' + # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|' + # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|' + # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|' + # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|' + # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|' + # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|' + # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|' + # r'COUNTARGS)\b', Name.Builtin, 'args'), + (r'\b([A-Za-z_]+)([ \t]*)(\()', bygroups(Name.Builtin, Text, + Punctuation), 'args'), + include('keywords'), + include('ws') + ], + 'args': [ + (r'\(', Punctuation, '#push'), + (r'\)', Punctuation, '#pop'), + (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)), + (r'(?s)".*?"', String.Double), + (r'\\\S+', String), + (r'[^\)$"# \t\n]+', String), + (r'\n', Text), # explicitly legal + include('keywords'), + include('ws') + ], + 'string': [ + + ], + 'keywords': [ + (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|' + r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword), + ], + 'ws': [ + (r'[ \t]+', Text), + (r'#.+\n', Comment), + ] + } + diff --git a/pygments_package/pygments/lexers/web.py b/pygments_package/pygments/lexers/web.py index d0a113d..ec0b27b 100644 --- a/pygments_package/pygments/lexers/web.py +++ b/pygments_package/pygments/lexers/web.py @@ -5,26 +5,25 @@ Lexers for web-related languages and markup. - :copyright: 2006-2008 by Georg Brandl, Armin Ronacher, - Tim Hatch , Stou Sandalski. - :license: BSD, see LICENSE for more details. + :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. """ import re -try: - set -except NameError: - from sets import Set as set -from pygments.lexer import RegexLexer, bygroups, using, include, this +from pygments.lexer import RegexLexer, ExtendedRegexLexer, bygroups, using, \ + include, this from pygments.token import \ Text, Comment, Operator, Keyword, Name, String, Number, Other, Punctuation from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \ html_doctype_matches +from pygments.lexers.agile import RubyLexer __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'CssLexer', - 'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer'] + 'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer', + 'MxmlLexer', 'HaxeLexer', 'HamlLexer', 'SassLexer', + 'ObjectiveJLexer', 'CoffeeScriptLexer'] class JavascriptLexer(RegexLexer): @@ -39,19 +38,37 @@ class JavascriptLexer(RegexLexer): flags = re.DOTALL tokens = { - 'root': [ + 'commentsandwhitespace': [ (r'\s+', Text), (r'', Comment, '#pop'), + ('-', Comment), + ], + 'tag': [ + (r'\s+', Text), + (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'), + (r'/?\s*>', Name.Tag, '#pop'), + ], + 'attr': [ + ('\s+', Text), + ('".*?"', String, '#pop'), + ("'.*?'", String, '#pop'), + (r'[^\s>]+', String, '#pop'), + ], + } + + +class HaxeLexer(RegexLexer): + """ + For haXe source code (http://haxe.org/). + """ + + name = 'haXe' + aliases = ['hx', 'haXe'] + filenames = ['*.hx'] + mimetypes = ['text/haxe'] + + ident = r'(?:[a-zA-Z_][a-zA-Z0-9_]*)' + typeid = r'(?:(?:[a-z0-9_\.])*[A-Z_][A-Za-z0-9_]*)' + key_prop = r'(?:default|null|never)' + key_decl_mod = r'(?:public|private|override|static|inline|extern|dynamic)' + + flags = re.DOTALL | re.MULTILINE + + tokens = { + 'root': [ + include('whitespace'), + include('comments'), + (key_decl_mod, Keyword.Declaration), + include('enumdef'), + include('typedef'), + include('classdef'), + include('imports'), + ], + + # General constructs + 'comments': [ + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline), + (r'#[^\n]*', Comment.Preproc), + ], + 'whitespace': [ + include('comments'), + (r'\s+', Text), + ], + 'codekeywords': [ + (r'\b(if|else|while|do|for|in|break|continue|' + r'return|switch|case|try|catch|throw|null|trace|' + r'new|this|super|untyped|cast|callback|here)\b', + Keyword.Reserved), + ], + 'literals': [ + (r'0[xX][0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r"'(\\\\|\\'|[^'])*'", String.Single), + (r'"(\\\\|\\"|[^"])*"', String.Double), + (r'~/([^\n])*?/[gisx]*', String.Regex), + (r'\b(true|false|null)\b', Keyword.Constant), + ], + 'codeblock': [ + include('whitespace'), + include('new'), + include('case'), + include('anonfundef'), + include('literals'), + include('vardef'), + include('codekeywords'), + (r'[();,\[\]]', Punctuation), + (r'(?:=|\+=|-=|\*=|/=|%=|&=|\|=|\^=|<<=|>>=|>>>=|\|\||&&|' + r'\.\.\.|==|!=|>|<|>=|<=|\||&|\^|<<|>>|>>>|\+|\-|\*|/|%|' + r'!|\+\+|\-\-|~|\.|\?|\:)', + Operator), + (ident, Name), + + (r'}', Punctuation,'#pop'), + (r'{', Punctuation,'#push'), + ], + + # Instance/Block level constructs + 'propertydef': [ + (r'(\()(' + key_prop + ')(,)(' + key_prop + ')(\))', + bygroups(Punctuation, Keyword.Reserved, Punctuation, + Keyword.Reserved, Punctuation)), + ], + 'new': [ + (r'\bnew\b', Keyword, 'typedecl'), + ], + 'case': [ + (r'\b(case)(\s+)(' + ident + ')(\s*)(\()', + bygroups(Keyword.Reserved, Text, Name, Text, Punctuation), + 'funargdecl'), + ], + 'vardef': [ + (r'\b(var)(\s+)(' + ident + ')', + bygroups(Keyword.Declaration, Text, Name.Variable), 'vardecl'), + ], + 'vardecl': [ + include('whitespace'), + include('typelabel'), + (r'=', Operator,'#pop'), + (r';', Punctuation,'#pop'), + ], + 'instancevardef': [ + (key_decl_mod,Keyword.Declaration), + (r'\b(var)(\s+)(' + ident + ')', + bygroups(Keyword.Declaration, Text, Name.Variable.Instance), + 'instancevardecl'), + ], + 'instancevardecl': [ + include('vardecl'), + include('propertydef'), + ], + + 'anonfundef': [ + (r'\bfunction\b', Keyword.Declaration, 'fundecl'), + ], + 'instancefundef': [ + (key_decl_mod, Keyword.Declaration), + (r'\b(function)(\s+)(' + ident + ')', + bygroups(Keyword.Declaration, Text, Name.Function), 'fundecl'), + ], + 'fundecl': [ + include('whitespace'), + include('typelabel'), + include('generictypedecl'), + (r'\(',Punctuation,'funargdecl'), + (r'(?=[a-zA-Z0-9_])',Text,'#pop'), + (r'{',Punctuation,('#pop','codeblock')), + (r';',Punctuation,'#pop'), + ], + 'funargdecl': [ + include('whitespace'), + (ident, Name.Variable), + include('typelabel'), + include('literals'), + (r'=', Operator), + (r',', Punctuation), + (r'\?', Punctuation), + (r'\)', Punctuation, '#pop'), + ], + + 'typelabel': [ + (r':', Punctuation, 'type'), + ], + 'typedecl': [ + include('whitespace'), + (typeid, Name.Class), + (r'<', Punctuation, 'generictypedecl'), + (r'(?=[{}()=,a-z])', Text,'#pop'), + ], + 'type': [ + include('whitespace'), + (typeid, Name.Class), + (r'<', Punctuation, 'generictypedecl'), + (r'->', Keyword.Type), + (r'(?=[{}(),;=])', Text, '#pop'), + ], + 'generictypedecl': [ + include('whitespace'), + (typeid, Name.Class), + (r'<', Punctuation, '#push'), + (r'>', Punctuation, '#pop'), + (r',', Punctuation), + ], + + # Top level constructs + 'imports': [ + (r'(package|import|using)(\s+)([^;]+)(;)', + bygroups(Keyword.Namespace, Text, Name.Namespace,Punctuation)), + ], + 'typedef': [ + (r'typedef', Keyword.Declaration, ('typedefprebody', 'typedecl')), + ], + 'typedefprebody': [ + include('whitespace'), + (r'(=)(\s*)({)', bygroups(Punctuation, Text, Punctuation), + ('#pop', 'typedefbody')), + ], + 'enumdef': [ + (r'enum', Keyword.Declaration, ('enumdefprebody', 'typedecl')), + ], + 'enumdefprebody': [ + include('whitespace'), + (r'{', Punctuation, ('#pop','enumdefbody')), + ], + 'classdef': [ + (r'class', Keyword.Declaration, ('classdefprebody', 'typedecl')), + ], + 'classdefprebody': [ + include('whitespace'), + (r'(extends|implements)', Keyword.Declaration,'typedecl'), + (r'{', Punctuation, ('#pop', 'classdefbody')), + ], + 'interfacedef': [ + (r'interface', Keyword.Declaration, + ('interfacedefprebody', 'typedecl')), + ], + 'interfacedefprebody': [ + include('whitespace'), + (r'(extends)', Keyword.Declaration, 'typedecl'), + (r'{', Punctuation, ('#pop', 'classdefbody')), + ], + + 'typedefbody': [ + include('whitespace'), + include('instancevardef'), + include('instancefundef'), + (r'>', Punctuation, 'typedecl'), + (r',', Punctuation), + (r'}', Punctuation, '#pop'), + ], + 'enumdefbody': [ + include('whitespace'), + (ident, Name.Variable.Instance), + (r'\(', Punctuation, 'funargdecl'), + (r';', Punctuation), + (r'}', Punctuation, '#pop'), + ], + 'classdefbody': [ + include('whitespace'), + include('instancevardef'), + include('instancefundef'), + (r'}', Punctuation, '#pop'), + include('codeblock'), + ], + } + + def analyse_text(text): + if re.match(r'\w+\s*:\s*\w', text): return 0.3 + + +def _indentation(lexer, match, ctx): + indentation = match.group(0) + yield match.start(), Text, indentation + ctx.last_indentation = indentation + ctx.pos = match.end() + + if hasattr(ctx, 'block_state') and ctx.block_state and \ + indentation.startswith(ctx.block_indentation) and \ + indentation != ctx.block_indentation: + ctx.stack.append(ctx.block_state) + else: + ctx.block_state = None + ctx.block_indentation = None + ctx.stack.append('content') + +def _starts_block(token, state): + def callback(lexer, match, ctx): + yield match.start(), token, match.group(0) + + if hasattr(ctx, 'last_indentation'): + ctx.block_indentation = ctx.last_indentation + else: + ctx.block_indentation = '' + + ctx.block_state = state + ctx.pos = match.end() + + return callback + + +class HamlLexer(ExtendedRegexLexer): + """ + For Haml markup. + + *New in Pygments 1.3.* + """ + + name = 'Haml' + aliases = ['haml', 'HAML'] + filenames = ['*.haml'] + mimetypes = ['text/x-haml'] + + flags = re.IGNORECASE + # Haml can include " |\n" anywhere, + # which is ignored and used to wrap long lines. + # To accomodate this, use this custom faux dot instead. + _dot = r'(?: \|\n(?=.* \|)|.)' + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[a-z0-9_:-]+', Name.Class, 'tag'), + (r'\#[a-z0-9_:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _dot + '*\n)', + bygroups(Punctuation, using(RubyLexer)), + 'root'), + (r'', Text, 'plain'), + ], + + 'content': [ + include('css'), + (r'%[a-z0-9_:-]+', Name.Tag, 'tag'), + (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc, + 'haml-comment-block'), '#pop'), + (r'(-)(' + _dot + '*\n)', + bygroups(Punctuation, using(RubyLexer)), + '#pop'), + (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + include('eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + ')*?\}', using(RubyLexer)), + (r'\[' + _dot + '*?\]', using(RubyLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(RubyLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[a-z0-9_:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[a-z0-9_:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'[a-z0-9_]+', Name.Variable, '#pop'), + (r'@[a-z0-9_]+', Name.Variable.Instance, '#pop'), + (r'\$[a-z0-9_]+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'haml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(RubyLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + } + + +class SassLexer(ExtendedRegexLexer): + """ + For Sass stylesheets. + + *New in Pygments 1.3.* + """ + + name = 'Sass' + aliases = ['sass', 'SASS'] + filenames = ['*.sass'] + mimetypes = ['text/x-sass'] + + flags = re.IGNORECASE + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'content': [ + (r'//[^\n]*', _starts_block(Comment.Single, 'single-comment'), + 'root'), + (r'/\*[^\n]*', _starts_block(Comment.Multiline, 'multi-comment'), + 'root'), + (r'@import', Keyword, 'import'), + (r'@for', Keyword, 'for'), + (r'@(debug|if|while)', Keyword, 'script'), + (r'@[a-z0-9_-]+', Keyword, 'selector'), + (r'=[\w-]+', Name.Function, 'script'), + (r'\+[\w-]+', Name.Decorator, 'script'), + (r'(![a-z_]\w*)([ \t]*(?:\|\|)?=)', + bygroups(Name.Variable, Operator), 'script'), + (r':', Name.Attribute, 'old-style-attr'), + (r'(?=[^\s:"\[]+\s*[=:]([ \t]|$))', Name.Attribute, 'new-style-attr'), + (r'', Text, 'selector'), + ], + + 'single-comment': [ + (r'.+', Comment.Single), + (r'\n', Text, 'root'), + ], + + 'multi-comment': [ + (r'.+', Comment.Multiline), + (r'\n', Text, 'root'), + ], + + 'import': [ + (r'[ \t]+', Text), + (r'[^\s]+', String), + (r'\n', Text, 'root'), + ], + + 'for': [ + (r'(from|to|through)', Operator.Word), + include('script'), + ], + + 'old-style-attr': [ + (r'[^\s:="\[]+', Name.Attribute), + (r'#{', String.Interpol, 'interpolation'), + (r'[ \t]*=', Operator, 'script'), + (r'', Text, 'value'), + ], + + 'new-style-attr': [ + (r'[^\s:="\[]+', Name.Attribute), + (r'#{', String.Interpol, 'interpolation'), + (r'[ \t]*=', Operator, 'script'), + (r':', Name.Attribute, 'value'), + ], + + 'value': [ + (r'[ \t]+', Text), + (r'url\(', String.Other, 'string-url'), + (r'(azimuth|background-attachment|background-color|' + r'background-image|background-position|background-repeat|' + r'background|border-bottom-color|border-bottom-style|' + r'border-bottom-width|border-left-color|border-left-style|' + r'border-left-width|border-right|border-right-color|' + r'border-right-style|border-right-width|border-top-color|' + r'border-top-style|border-top-width|border-bottom|' + r'border-collapse|border-left|border-width|border-color|' + r'border-spacing|border-style|border-top|border|caption-side|' + r'clear|clip|color|content|counter-increment|counter-reset|' + r'cue-after|cue-before|cue|cursor|direction|display|' + r'elevation|empty-cells|float|font-family|font-size|' + r'font-size-adjust|font-stretch|font-style|font-variant|' + r'font-weight|font|height|letter-spacing|line-height|' + r'list-style-type|list-style-image|list-style-position|' + r'list-style|margin-bottom|margin-left|margin-right|' + r'margin-top|margin|marker-offset|marks|max-height|max-width|' + r'min-height|min-width|opacity|orphans|outline|outline-color|' + r'outline-style|outline-width|overflow|padding-bottom|' + r'padding-left|padding-right|padding-top|padding|page|' + r'page-break-after|page-break-before|page-break-inside|' + r'pause-after|pause-before|pause|pitch|pitch-range|' + r'play-during|position|quotes|richness|right|size|' + r'speak-header|speak-numeral|speak-punctuation|speak|' + r'speech-rate|stress|table-layout|text-align|text-decoration|' + r'text-indent|text-shadow|text-transform|top|unicode-bidi|' + r'vertical-align|visibility|voice-family|volume|white-space|' + r'widows|width|word-spacing|z-index|bottom|left|' + r'above|absolute|always|armenian|aural|auto|avoid|baseline|' + r'behind|below|bidi-override|blink|block|bold|bolder|both|' + r'capitalize|center-left|center-right|center|circle|' + r'cjk-ideographic|close-quote|collapse|condensed|continuous|' + r'crop|crosshair|cross|cursive|dashed|decimal-leading-zero|' + r'decimal|default|digits|disc|dotted|double|e-resize|embed|' + r'extra-condensed|extra-expanded|expanded|fantasy|far-left|' + r'far-right|faster|fast|fixed|georgian|groove|hebrew|help|' + r'hidden|hide|higher|high|hiragana-iroha|hiragana|icon|' + r'inherit|inline-table|inline|inset|inside|invert|italic|' + r'justify|katakana-iroha|katakana|landscape|larger|large|' + r'left-side|leftwards|level|lighter|line-through|list-item|' + r'loud|lower-alpha|lower-greek|lower-roman|lowercase|ltr|' + r'lower|low|medium|message-box|middle|mix|monospace|' + r'n-resize|narrower|ne-resize|no-close-quote|no-open-quote|' + r'no-repeat|none|normal|nowrap|nw-resize|oblique|once|' + r'open-quote|outset|outside|overline|pointer|portrait|px|' + r'relative|repeat-x|repeat-y|repeat|rgb|ridge|right-side|' + r'rightwards|s-resize|sans-serif|scroll|se-resize|' + r'semi-condensed|semi-expanded|separate|serif|show|silent|' + r'slow|slower|small-caps|small-caption|smaller|soft|solid|' + r'spell-out|square|static|status-bar|super|sw-resize|' + r'table-caption|table-cell|table-column|table-column-group|' + r'table-footer-group|table-header-group|table-row|' + r'table-row-group|text|text-bottom|text-top|thick|thin|' + r'transparent|ultra-condensed|ultra-expanded|underline|' + r'upper-alpha|upper-latin|upper-roman|uppercase|url|' + r'visible|w-resize|wait|wider|x-fast|x-high|x-large|x-loud|' + r'x-low|x-small|x-soft|xx-large|xx-small|yes)\b', Name.Constant), + (r'(indigo|gold|firebrick|indianred|yellow|darkolivegreen|' + r'darkseagreen|mediumvioletred|mediumorchid|chartreuse|' + r'mediumslateblue|black|springgreen|crimson|lightsalmon|brown|' + r'turquoise|olivedrab|cyan|silver|skyblue|gray|darkturquoise|' + r'goldenrod|darkgreen|darkviolet|darkgray|lightpink|teal|' + r'darkmagenta|lightgoldenrodyellow|lavender|yellowgreen|thistle|' + r'violet|navy|orchid|blue|ghostwhite|honeydew|cornflowerblue|' + r'darkblue|darkkhaki|mediumpurple|cornsilk|red|bisque|slategray|' + r'darkcyan|khaki|wheat|deepskyblue|darkred|steelblue|aliceblue|' + r'gainsboro|mediumturquoise|floralwhite|coral|purple|lightgrey|' + r'lightcyan|darksalmon|beige|azure|lightsteelblue|oldlace|' + r'greenyellow|royalblue|lightseagreen|mistyrose|sienna|' + r'lightcoral|orangered|navajowhite|lime|palegreen|burlywood|' + r'seashell|mediumspringgreen|fuchsia|papayawhip|blanchedalmond|' + r'peru|aquamarine|white|darkslategray|ivory|dodgerblue|' + r'lemonchiffon|chocolate|orange|forestgreen|slateblue|olive|' + r'mintcream|antiquewhite|darkorange|cadetblue|moccasin|' + r'limegreen|saddlebrown|darkslateblue|lightskyblue|deeppink|' + r'plum|aqua|darkgoldenrod|maroon|sandybrown|magenta|tan|' + r'rosybrown|pink|lightblue|palevioletred|mediumseagreen|' + r'dimgray|powderblue|seagreen|snow|mediumblue|midnightblue|' + r'paleturquoise|palegoldenrod|whitesmoke|darkorchid|salmon|' + r'lightslategray|lawngreen|lightgreen|tomato|hotpink|' + r'lightyellow|lavenderblush|linen|mediumaquamarine|green|' + r'blueviolet|peachpuff)\b', Name.Entity), + (r'\!important', Name.Exception), + (r'/\*', Comment, 'inline-comment'), + (r'\#[a-z0-9]{1,6}', Number.Hex), + (r'(-?\d+)(\%|[a-z]+)?', bygroups(Number.Integer, Keyword.Type)), + (r'(-?\d*\.\d+)(\%|[a-z]+)?', bygroups(Number.Float, Keyword.Type)), + (r'#{', String.Interpol, 'interpolation'), + (r'[~\^\*!&%<>\|+=@:,./?-]+', Operator), + (r'[\[\]();]+', Punctuation), + (r'"', String.Double, 'string-double'), + (r"'", String.Single, 'string-single'), + (r'[a-z][\w-]*', Name), + (r'\n', Text, 'root'), + ], + + 'script': [ + (r'[ \t]+', Text), + (r'![\w_]+', Name.Variable), + (r'[+\-*/%=(),!><]', Operator), + (r'"', String.Double, 'string-double'), + (r"'", String.Single, 'string-single'), + (r'\#[a-z0-9]{1,6}', Number.Hex), + (r'(-?\d+)(\%|[a-z]+)?', bygroups(Number.Integer, Keyword.Type)), + (r'(-?\d*\.\d+)(\%|[a-z]+)?', bygroups(Number.Float, Keyword.Type)), + (r'(black|silver|gray|white|maroon|red|purple|fuchsia|green|' + r'lime|olive|yellow|navy|blue|teal|aqua)\b', Name.Builtin), + (r'(true|false)', Name.Pseudo), + (r'(and|or|not)', Operator.Word), + (r'(\\.|[^\s\\+*\/%(),=!])+(?=[ \t]*\()', Name.Function), + (r'(\\.|[^\s\\+*\/%(),=!])+', Name), + (r'\n', Text, 'root'), + ], + + 'interpolation': [ + (r'\}', String.Interpol, '#pop'), + include('script'), + ], + + 'selector': [ + (r'[ \t]+', Text), + (r'\:', Name.Decorator, 'pseudo-class'), + (r'\.', Name.Class, 'class'), + (r'\#', Name.Namespace, 'id'), + (r'[a-zA-Z0-9_-]+', Name.Tag), + (r'#\{', String.Interpol, 'interpolation'), + (r'&', Keyword), + (r'[~\^\*!&\[\]\(\)<>\|+=@:;,./?-]', Operator), + (r'"', String.Double, 'string-double'), + (r"'", String.Single, 'string-single'), + (r'\n', Text, 'root'), + ], + + 'string-double': [ + (r'(\\.|#(?=[^\n{])|[^\n"#])+', String.Double), + (r'#\{', String.Interpol, 'interpolation'), + (r'"', String.Double, '#pop'), + ], + + 'string-single': [ + (r"(\\.|#(?=[^\n{])|[^\n'#])+", String.Double), + (r'#\{', String.Interpol, 'interpolation'), + (r"'", String.Double, '#pop'), + ], + + 'string-url': [ + (r'(\\#|#(?=[^\n{])|[^\n#)])+', String.Other), + (r'#\{', String.Interpol, 'interpolation'), + (r'\)', String.Other, '#pop'), + ], + + 'inline-comment': [ + (r"(\\#|#(?=[^\n{])|\*(?=[^\n/])|[^\n#*])+", Comment), + (r'#\{', String.Interpol, 'interpolation'), + (r"\*/", Comment, '#pop'), + ], + + 'pseudo-class': [ + (r'[\w-]+', Name.Decorator), + (r'#\{', String.Interpol, 'interpolation'), + (r'', Text, '#pop'), + ], + + 'class': [ + (r'[\w-]+', Name.Class), + (r'#\{', String.Interpol, 'interpolation'), + (r'', Text, '#pop'), + ], + + 'id': [ + (r'[\w-]+', Name.Namespace), + (r'#\{', String.Interpol, 'interpolation'), + (r'', Text, '#pop'), + ], + } + + +class CoffeeScriptLexer(RegexLexer): + """ + For `CoffeeScript`_ source code. + + .. _CoffeeScript: http://jashkenas.github.com/coffee-script/ + + *New in Pygments 1.3.* + """ + + name = 'CoffeeScript' + aliases = ['coffee-script', 'coffeescript'] + filenames = ['*.coffee'] + mimetypes = ['text/coffeescript'] + + flags = re.DOTALL + tokens = { + 'commentsandwhitespace': [ + (r'\s+', Text), + (r'#.*?\n', Comment.Single), + ], + 'slashstartsregex': [ + include('commentsandwhitespace'), + (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' + r'([gim]+\b|\B)', String.Regex, '#pop'), + (r'(?=/)', Text, ('#pop', 'badregex')), + (r'', Text, '#pop'), + ], + 'badregex': [ + ('\n', Text, '#pop'), + ], + 'root': [ + (r'^(?=\s|/|