|
| 1 | +name: Repo Tokens |
| 2 | +description: Count codebase tokens with tiktoken and update a README badge |
| 3 | + |
| 4 | +inputs: |
| 5 | + include: |
| 6 | + description: 'Glob patterns for files to count (space-separated)' |
| 7 | + required: true |
| 8 | + exclude: |
| 9 | + description: 'Glob patterns to exclude (space-separated)' |
| 10 | + required: false |
| 11 | + default: '' |
| 12 | + context-window: |
| 13 | + description: 'Context window size for percentage calculation' |
| 14 | + required: false |
| 15 | + default: '200000' |
| 16 | + readme: |
| 17 | + description: 'Path to README file' |
| 18 | + required: false |
| 19 | + default: 'README.md' |
| 20 | + encoding: |
| 21 | + description: 'Tiktoken encoding name' |
| 22 | + required: false |
| 23 | + default: 'cl100k_base' |
| 24 | + marker: |
| 25 | + description: 'HTML comment marker name' |
| 26 | + required: false |
| 27 | + default: 'token-count' |
| 28 | + badge-path: |
| 29 | + description: 'Path to write SVG badge (empty = no SVG)' |
| 30 | + required: false |
| 31 | + default: '' |
| 32 | + |
| 33 | +outputs: |
| 34 | + tokens: |
| 35 | + description: 'Total token count' |
| 36 | + value: ${{ steps.count.outputs.tokens }} |
| 37 | + percentage: |
| 38 | + description: 'Percentage of context window' |
| 39 | + value: ${{ steps.count.outputs.percentage }} |
| 40 | + badge: |
| 41 | + description: 'Badge text that was inserted' |
| 42 | + value: ${{ steps.count.outputs.badge }} |
| 43 | + |
| 44 | +runs: |
| 45 | + using: composite |
| 46 | + steps: |
| 47 | + - name: Install tiktoken |
| 48 | + shell: bash |
| 49 | + run: pip install tiktoken |
| 50 | + |
| 51 | + - name: Count tokens and update README |
| 52 | + id: count |
| 53 | + shell: python |
| 54 | + env: |
| 55 | + INPUT_INCLUDE: ${{ inputs.include }} |
| 56 | + INPUT_EXCLUDE: ${{ inputs.exclude }} |
| 57 | + INPUT_CONTEXT_WINDOW: ${{ inputs.context-window }} |
| 58 | + INPUT_README: ${{ inputs.readme }} |
| 59 | + INPUT_ENCODING: ${{ inputs.encoding }} |
| 60 | + INPUT_MARKER: ${{ inputs.marker }} |
| 61 | + INPUT_BADGE_PATH: ${{ inputs.badge-path }} |
| 62 | + run: | |
| 63 | + import glob, os, re, tiktoken |
| 64 | +
|
| 65 | + include_patterns = os.environ["INPUT_INCLUDE"].split() |
| 66 | + exclude_patterns = os.environ["INPUT_EXCLUDE"].split() |
| 67 | + context_window = int(os.environ["INPUT_CONTEXT_WINDOW"]) |
| 68 | + readme_path = os.environ["INPUT_README"] |
| 69 | + encoding_name = os.environ["INPUT_ENCODING"] |
| 70 | + marker = os.environ["INPUT_MARKER"] |
| 71 | + badge_path = os.environ.get("INPUT_BADGE_PATH", "").strip() |
| 72 | +
|
| 73 | + # Expand globs |
| 74 | + included = set() |
| 75 | + for pattern in include_patterns: |
| 76 | + included.update(glob.glob(pattern, recursive=True)) |
| 77 | +
|
| 78 | + excluded = set() |
| 79 | + for pattern in exclude_patterns: |
| 80 | + excluded.update(glob.glob(pattern, recursive=True)) |
| 81 | +
|
| 82 | + files = sorted(included - excluded) |
| 83 | + files = [f for f in files if os.path.isfile(f)] |
| 84 | +
|
| 85 | + # Count tokens |
| 86 | + enc = tiktoken.get_encoding(encoding_name) |
| 87 | + total = 0 |
| 88 | + for path in files: |
| 89 | + try: |
| 90 | + with open(path, "r", encoding="utf-8", errors="ignore") as f: |
| 91 | + total += len(enc.encode(f.read())) |
| 92 | + except Exception as e: |
| 93 | + print(f"Skipping {path}: {e}") |
| 94 | +
|
| 95 | + # Format |
| 96 | + if total >= 100000: |
| 97 | + display = f"{round(total / 1000)}k" |
| 98 | + elif total >= 1000: |
| 99 | + display = f"{total / 1000:.1f}k" |
| 100 | + else: |
| 101 | + display = str(total) |
| 102 | +
|
| 103 | + pct = round(total / context_window * 100) |
| 104 | + badge = f"{display} tokens \u00b7 {pct}% of context window" |
| 105 | +
|
| 106 | + print(f"Files: {len(files)}, Tokens: {total}, Badge: {badge}") |
| 107 | +
|
| 108 | + # Update README (text between markers) |
| 109 | + marker_re = re.compile( |
| 110 | + rf"(<!--\s*{re.escape(marker)}\s*-->).*?(<!--\s*/{re.escape(marker)}\s*-->)", |
| 111 | + re.DOTALL, |
| 112 | + ) |
| 113 | +
|
| 114 | + with open(readme_path, "r", encoding="utf-8") as f: |
| 115 | + content = f.read() |
| 116 | +
|
| 117 | + repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens" |
| 118 | + linked_badge = f'<a href="{repo_tokens_url}">{badge}</a>' |
| 119 | + new_content = marker_re.sub(rf"\1{linked_badge}\2", content) |
| 120 | +
|
| 121 | + if new_content != content: |
| 122 | + with open(readme_path, "w", encoding="utf-8") as f: |
| 123 | + f.write(new_content) |
| 124 | + print("README updated") |
| 125 | + else: |
| 126 | + print("No change to README") |
| 127 | +
|
| 128 | + # Generate SVG badge |
| 129 | + if badge_path: |
| 130 | + label_text = "tokens" |
| 131 | + value_text = display |
| 132 | + full_desc = f"{display} tokens, {pct}% of context window" |
| 133 | +
|
| 134 | + cw = 7.0 |
| 135 | + label_w = round(len(label_text) * cw) + 10 |
| 136 | + value_w = round(len(value_text) * cw) + 10 |
| 137 | + total_w = label_w + value_w |
| 138 | +
|
| 139 | + if pct < 30: |
| 140 | + color = "#4c1" |
| 141 | + elif pct < 50: |
| 142 | + color = "#97ca00" |
| 143 | + elif pct < 70: |
| 144 | + color = "#dfb317" |
| 145 | + else: |
| 146 | + color = "#e05d44" |
| 147 | +
|
| 148 | + lx = label_w // 2 |
| 149 | + vx = label_w + value_w // 2 |
| 150 | +
|
| 151 | + repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens" |
| 152 | +
|
| 153 | + svg = f'''<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="{total_w}" height="20" role="img" aria-label="{full_desc}"> |
| 154 | + <title>{full_desc}</title> |
| 155 | + <linearGradient id="s" x2="0" y2="100%"> |
| 156 | + <stop offset="0" stop-color="#bbb" stop-opacity=".1"/> |
| 157 | + <stop offset="1" stop-opacity=".1"/> |
| 158 | + </linearGradient> |
| 159 | + <clipPath id="r"> |
| 160 | + <rect width="{total_w}" height="20" rx="3" fill="#fff"/> |
| 161 | + </clipPath> |
| 162 | + <a xlink:href="{repo_tokens_url}"> |
| 163 | + <g clip-path="url(#r)"> |
| 164 | + <rect width="{label_w}" height="20" fill="#555"/> |
| 165 | + <rect x="{label_w}" width="{value_w}" height="20" fill="{color}"/> |
| 166 | + <rect width="{total_w}" height="20" fill="url(#s)"/> |
| 167 | + <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" font-size="11"> |
| 168 | + <text aria-hidden="true" x="{lx}" y="15" fill="#010101" fill-opacity=".3">{label_text}</text> |
| 169 | + <text x="{lx}" y="14">{label_text}</text> |
| 170 | + <text aria-hidden="true" x="{vx}" y="15" fill="#010101" fill-opacity=".3">{value_text}</text> |
| 171 | + <text x="{vx}" y="14">{value_text}</text> |
| 172 | + </g> |
| 173 | + </g> |
| 174 | + </a> |
| 175 | + </svg>''' |
| 176 | +
|
| 177 | + os.makedirs(os.path.dirname(badge_path) or ".", exist_ok=True) |
| 178 | + with open(badge_path, "w", encoding="utf-8") as f: |
| 179 | + f.write(svg) |
| 180 | + print(f"Badge SVG written to {badge_path}") |
| 181 | +
|
| 182 | + # Set outputs |
| 183 | + with open(os.environ["GITHUB_OUTPUT"], "a") as f: |
| 184 | + f.write(f"tokens={total}\n") |
| 185 | + f.write(f"percentage={pct}\n") |
| 186 | + f.write(f"badge={badge}\n") |
0 commit comments