getcodelimit
diff --git a/‎codelimit/common/Scanner.py‎
Lines changed: 6 additions & 4 deletions b/‎codelimit/common/Scanner.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎codelimit/common/TokenRange.py‎
Lines changed: 18 additions & 18 deletions b/‎codelimit/common/TokenRange.py‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎codelimit/common/scope/Header.py‎
Lines changed: 9 additions & 0 deletions b/‎codelimit/common/scope/Header.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎codelimit/common/scope/Scope.py‎
Lines changed: 2 additions & 24 deletions b/‎codelimit/common/scope/Scope.py‎
Lines changed: 2 additions & 24 deletions
diff --git a/‎codelimit/common/scope/scope_utils.py‎
Lines changed: 41 additions & 28 deletions b/‎codelimit/common/scope/scope_utils.py‎
Lines changed: 41 additions & 28 deletions
diff --git a/‎codelimit/common/token_matching/predicate/Not.py‎
Lines changed: 20 additions & 0 deletions b/‎codelimit/common/token_matching/predicate/Not.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎codelimit/common/token_utils.py‎
Lines changed: 7 additions & 18 deletions b/‎codelimit/common/token_utils.py‎
Lines changed: 7 additions & 18 deletions
diff --git a/‎codelimit/languages/Java.py‎
Lines changed: 16 additions & 1 deletion b/‎codelimit/languages/Java.py‎
Lines changed: 16 additions & 1 deletion
@@ -23,7 +23,8 @@
 from codelimit.common.Token import Token
 from codelimit.common.lexer_utils import lex
 from codelimit.common.report.Report import Report
-from codelimit.common.scope.scope_utils import build_scopes, unfold_scopes
+from codelimit.common.scope.scope_utils import build_scopes, unfold_scopes, count_lines
+from codelimit.common.source_utils import filter_tokens
 from codelimit.common.utils import (
     calculate_checksum,
 )
@@ -141,11 +142,12 @@ def scan_file(tokens: list[Token], language: Language) -> list[Measurement]:
     scopes = build_scopes(tokens, language)
     scopes = unfold_scopes(scopes)
     measurements: list[Measurement] = []
+    code_tokens = filter_tokens(tokens)
     if scopes:
         for scope in scopes:
-            length = len(scope)
-            start_location = scope.header.token_range[0].location
-            last_token = scope.block.tokens[-1]
+            length = count_lines(scope, code_tokens)
+            start_location = code_tokens[scope.header.token_range.start].location
+            last_token = code_tokens[scope.block.end - 1]
             end_location = Location(
                 last_token.location.line,
                 last_token.location.column + len(last_token.value),
 
@@ -4,37 +4,37 @@
 
 
 class TokenRange:
-    def __init__(self, tokens: list[Token]):
-        self.tokens = tokens
+    def __init__(self, start: int, end: int):
+        self.start = start
+        self.end = end
 
     def __str__(self):
-        return f"[{self.tokens[0].location}, {self.tokens[-1].location}]"
+        return f"({self.start}, {self.end})"
 
     def __repr__(self):
         return self.__str__()
 
-    def __getitem__(self, item):
-        return self.tokens.__getitem__(item)
-
-    def token_string(self):
-        return " ".join([t.value for t in self.tokens])
+    def token_string(self, tokens: list[Token]):
+        return " ".join([t.value for t in tokens[self.start:self.end]])
 
     def lt(self, other: TokenRange):
-        return self.tokens[-1].location.lt(other.tokens[0].location)
+        return self.start < other.start
 
     def gt(self, other: TokenRange):
         return other.lt(self)
 
     def contains(self, other: TokenRange):
-        return self.tokens[0].location.lt(other.tokens[0].location) and self.tokens[
-            -1
-        ].location.gt(other.tokens[-1].location)
+        return self.start < other.start and self.end > other.end
 
     def overlaps(self, other: TokenRange):
-        start_overlap = self.tokens[0].location.le(
-            other.tokens[0].location
-        ) and self.tokens[-1].location.ge(other.tokens[0].location)
-        end_overlap = self.tokens[0].location.le(
-            other.tokens[0].location
-        ) and self.tokens[-1].location.ge(other.tokens[0].location)
+        start_overlap = self.start <= other.start <= self.end
+        end_overlap = self.start <= other.end <= self.end
         return start_overlap or end_overlap
+
+
+def sort_token_ranges(token_ranges: list[TokenRange], tokens: list[Token], reverse=False) -> list[TokenRange]:
+    return sorted(
+        token_ranges,
+        reverse=reverse,
+        key=lambda tr: (tokens[tr.start].location.line, tokens[tr.start].location.column),
+    )
@@ -1,9 +1,18 @@
 from dataclasses import dataclass
 
+from codelimit.common.Token import Token
 from codelimit.common.TokenRange import TokenRange
 
 
 @dataclass
 class Header:
     name: str
     token_range: TokenRange
+
+
+def sort_headers(headers: list[Header], tokens: list[Token], reverse=False) -> list[Header]:
+    return sorted(
+        headers,
+        reverse=reverse,
+        key=lambda h: (tokens[h.token_range.start].location.line, tokens[h.token_range.start].location.column),
+    )
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from codelimit.common.Token import Token
 from codelimit.common.TokenRange import TokenRange
 from codelimit.common.scope.Header import Header
 
@@ -12,31 +11,10 @@ def __init__(self, header: Header, block: TokenRange):
         self.children: list[Scope] = []
 
     def __str__(self):
-        return (
-            f"[{self.header.token_range[0].location}, {self.block.tokens[-1].location}]"
-        )
+        return f"({self.header.token_range.start}, {self.block.end})"
 
     def __repr__(self):
         return self.__str__()
 
-    def __len__(self):
-        return count_lines(self.tokens())
-
-    def tokens(self):
-        children_tokens = []
-        for child in self.children:
-            children_tokens.extend(child.tokens())
-        return [
-            t
-            for t in self.header.token_range.tokens + self.block.tokens
-            if t not in children_tokens
-        ]
-
     def contains(self, other: Scope) -> bool:
-        return self.header.token_range[0].location.lt(
-            other.header.token_range[0].location
-        ) and self.block.tokens[-1].location.gt(other.block.tokens[-1].location)
-
-
-def count_lines(tokens: list[Token]):
-    return len(set([t.location.line for t in tokens]))
+        return self.header.token_range.start < other.header.token_range.start and self.block.end > other.block.end
@@ -2,17 +2,13 @@
 
 from codelimit.common.Language import Language
 from codelimit.common.Token import Token
-from codelimit.common.TokenRange import TokenRange
+from codelimit.common.TokenRange import TokenRange, sort_token_ranges
 from codelimit.common.gsm.Expression import Expression
 from codelimit.common.gsm.matcher import find_all, starts_with
-from codelimit.common.scope.Header import Header
+from codelimit.common.scope.Header import Header, sort_headers
 from codelimit.common.scope.Scope import Scope
 from codelimit.common.source_utils import filter_tokens, filter_nocl_comment_tokens
-from codelimit.common.token_utils import (
-    sort_tokens,
-    sort_token_ranges,
-    get_balanced_symbol_token_indices,
-)
+from codelimit.common.token_utils import get_balanced_symbol_token_indices
 from codelimit.common.utils import delete_indices
 
 
@@ -21,8 +17,8 @@ def build_scopes(tokens: list[Token], language: Language) -> list[Scope]:
     nocl_comment_tokens = filter_nocl_comment_tokens(tokens)
     headers = language.extract_headers(code_tokens)
     blocks = language.extract_blocks(code_tokens, headers)
-    scopes = _build_scopes_from_headers_and_blocks(headers, blocks)
-    filtered_scopes = _filter_nocl_scopes(scopes, nocl_comment_tokens)
+    scopes = _build_scopes_from_headers_and_blocks(headers, blocks, code_tokens)
+    filtered_scopes = _filter_nocl_scopes(scopes, code_tokens, nocl_comment_tokens)
     if language.allow_nested_functions:
         return fold_scopes(filtered_scopes)
     else:
@@ -66,26 +62,23 @@ def filter_scopes_nested_functions(scopes: list[Scope]) -> list[Scope]:
 
 
 def _build_scopes_from_headers_and_blocks(
-    headers: list[Header], blocks: list[TokenRange]
+        headers: list[Header], blocks: list[TokenRange], tokens: list[Token]
 ) -> list[Scope]:
     result: list[Scope] = []
-    reverse_headers = headers[::-1]
+    reverse_headers = sort_headers(headers, tokens, reverse=True)
     for header in reverse_headers:
         scope_blocks_indices = _find_scope_blocks_indices(header.token_range, blocks)
         if len(scope_blocks_indices) > 0:
-            scope_tokens = []
-            for i in scope_blocks_indices:
-                scope_tokens.extend(blocks[i].tokens)
-            scope_tokens = sort_tokens(scope_tokens)
-            scope_block = TokenRange(scope_tokens)
-            result.append(Scope(header, scope_block))
+            start = min(blocks[bi].start for bi in scope_blocks_indices)
+            end = max(blocks[bi].end for bi in scope_blocks_indices)
+            result.append(Scope(header, TokenRange(start, end)))
             blocks = delete_indices(blocks, scope_blocks_indices)
     result.reverse()
     return result
 
 
 def _find_scope_blocks_indices(
-    header: TokenRange, blocks: list[TokenRange]
+        header: TokenRange, blocks: list[TokenRange]
 ) -> list[int]:
     body_block = _get_nearest_block(header, blocks)
     if body_block:
@@ -97,7 +90,7 @@ def _find_scope_blocks_indices(
 
 
 def _get_nearest_block(
-    header: TokenRange, blocks: list[TokenRange]
+        header: TokenRange, blocks: list[TokenRange]
 ) -> Optional[TokenRange]:
     reverse_blocks = blocks[::-1]
     result = None
@@ -112,13 +105,15 @@ def _get_nearest_block(
 
 
 def _filter_nocl_scopes(
-    scopes: list[Scope], nocl_comment_tokens: list[Token]
+        scopes: list[Scope], tokens: list[Token], nocl_comment_tokens: list[Token]
 ) -> list[Scope]:
     nocl_comment_lines = [t.location.line for t in nocl_comment_tokens]
 
     def get_scope_header_lines(scope: Scope) -> set[int]:
-        result = set([t.location.line for t in scope.header.token_range.tokens])
-        first_line = scope.header.token_range.tokens[0].location.line
+        header_token_range = scope.header.token_range
+        header_tokens = tokens[header_token_range.start:header_token_range.end]
+        result = set([t.location.line for t in header_tokens])
+        first_line = header_tokens[0].location.line
         if first_line > 0:
             result.add(first_line - 1)
         return result
@@ -139,25 +134,43 @@ def has_curly_suffix(tokens: list[Token], index):
 
 
 def get_headers(
-    tokens: list[Token], expression: Expression, followed_by: Expression = None
+        tokens: list[Token], expression: Expression, followed_by: Expression = None
 ) -> list[Header]:
     # expression = replace_string_literal_with_predicate(expression)
     patterns = find_all(expression, tokens)
     if followed_by:
-        patterns = [p for p in patterns if starts_with(followed_by, tokens[p.end :])]
+        patterns = [p for p in patterns if starts_with(followed_by, tokens[p.end:])]
     result = []
     for pattern in patterns:
         name_token = next(t for t in pattern.tokens if t.is_name())
         if name_token:
-            result.append(Header(name_token.value, TokenRange(pattern.tokens)))
+            result.append(Header(name_token.value, TokenRange(pattern.start, pattern.end)))
     return result
 
 
 def get_blocks(
-    tokens: list[Token], open: str, close: str, extract_nested=True
+        tokens: list[Token], open: str, close: str, extract_nested=True
 ) -> list[TokenRange]:
     balanced_tokens = get_balanced_symbol_token_indices(
         tokens, open, close, extract_nested
     )
-    token_ranges = [TokenRange(tokens[bt[0] : bt[1] + 1]) for bt in balanced_tokens]
-    return sort_token_ranges(token_ranges)
+    token_ranges = [TokenRange(bt[0], bt[1] + 1) for bt in balanced_tokens]
+    return sort_token_ranges(token_ranges, tokens)
+
+
+def count_lines(scope: Scope, tokens: list[Token]):
+    return len(set([t.location.line for t in _scope_tokens(scope, tokens)]))
+
+
+def _scope_tokens(scope: Scope, tokens: list[Token]) -> list[Token]:
+    result = []
+    children_token_ranges = []
+    for child in scope.children:
+        children_token_ranges.append(TokenRange(child.header.token_range.start, child.block.end))
+    children_token_ranges = sort_token_ranges(children_token_ranges, tokens)
+    for index in range(scope.header.token_range.start, scope.block.end):
+        while len(children_token_ranges) > 0 and index > children_token_ranges[0].end:
+            children_token_ranges.pop(0)
+        if len(children_token_ranges) == 0 or index < children_token_ranges[0].start:
+            result.append(tokens[index])
+    return result
@@ -0,0 +1,20 @@
+from codelimit.common.Token import Token
+from codelimit.common.token_matching.predicate.TokenPredicate import TokenPredicate
+from codelimit.common.token_matching.predicate.TokenValue import TokenValue
+
+
+class Not(TokenPredicate):
+    def __init__(self, value: TokenPredicate | str):
+        super().__init__()
+        self.predicate = value if isinstance(value, TokenPredicate) else TokenValue(value)
+
+    def accept(self, token: Token) -> bool:
+        return not self.predicate.accept(token)
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Not):
+            return False
+        return self.predicate == other.predicate
+
+    def __hash__(self):
+        return hash(self.predicate)
@@ -5,7 +5,7 @@
 
 
 def get_balanced_symbol_token_indices(
-    tokens: list[Token], start: str, end: str, extract_nested=False
+        tokens: list[Token], start: str, end: str, extract_nested=False
 ) -> list[Tuple[int, int]]:
     result = []
     block_starts = []
@@ -21,32 +21,21 @@ def get_balanced_symbol_token_indices(
 
 
 def get_balanced_symbol_token_ranges(
-    tokens: list[Token], start: str, end: str
+        tokens: list[Token], start: str, end: str
 ) -> list[TokenRange]:
     result = []
-    token_lists = []
+    start_indices: list[int] = []
     for index, t in enumerate(tokens):
         if t.is_symbol(start):
-            token_lists.append([t])
+            start_indices.append(index)
         elif t.is_symbol(end):
-            if len(token_lists) > 0:
-                token_lists[-1].append(t)
-                tokens = token_lists.pop()
-                result.append(TokenRange(tokens))
-        else:
-            if len(token_lists) > 0:
-                token_lists[-1].append(t)
+            if len(start_indices) > 0:
+                start_index = start_indices.pop()
+                result.append(TokenRange(start_index, index + 1))
     return result
 
 
 def sort_tokens(tokens: list[Token]) -> list[Token]:
     result = sorted(tokens, key=lambda t: t.location.column)
     result = sorted(result, key=lambda t: t.location.line)
     return result
-
-
-def sort_token_ranges(token_ranges: list[TokenRange]) -> list[TokenRange]:
-    return sorted(
-        token_ranges,
-        key=lambda tr: (tr.tokens[0].location.line, tr.tokens[0].location.column),
-    )
@@ -1,11 +1,14 @@
 from codelimit.common.Language import Language
+from codelimit.common.Token import Token
 from codelimit.common.gsm.operator.OneOrMore import OneOrMore
+from codelimit.common.scope.Header import Header
 from codelimit.common.scope.scope_utils import (
     get_blocks,
     get_headers,
 )
 from codelimit.common.token_matching.predicate.Balanced import Balanced
 from codelimit.common.token_matching.predicate.Choice import Choice
+from codelimit.common.token_matching.predicate.Keyword import Keyword
 from codelimit.common.token_matching.predicate.Name import Name
 
 
@@ -14,9 +17,21 @@ def __init__(self):
         super().__init__("Java")
 
     def extract_headers(self, tokens: list) -> list:
-        return get_headers(
+        headers = get_headers(
             tokens, [Name(), OneOrMore(Balanced("(", ")"))], Choice("{", "throws")
         )
+        return filter_headers(headers, tokens)
 
     def extract_blocks(self, tokens: list, headers: list) -> list:
         return get_blocks(tokens, "{", "}")
+
+
+def filter_headers(headers: list[Header], tokens: list[Token]) -> list[Header]:
+    result = []
+    keywords = Choice(Keyword('record'), Keyword('new'))
+    for header in headers:
+        if header.token_range.start > 0 and keywords.accept(tokens[header.token_range.start - 1]):
+            continue
+        else:
+            result.append(header)
+    return result