@@ -101,6 +101,11 @@ release:
101101
102102 ### Installation
103103
104+ #### Homebrew (macOS and Linux)
105+ ```bash
106+ brew install agentstation/tap/tokenizer
107+ ```
108+
104109 #### Download Binary
105110 Download the appropriate binary for your platform from the assets below.
106111
@@ -115,13 +120,116 @@ release:
115120 sha256sum -c checksums.txt
116121 ```
117122
118- # Optional: Homebrew tap configuration
119- # brews:
120- # - name: tokenizer
121- # repository:
122- # owner: agentstation
123- # name: homebrew-tap
124- # directory: Formula
125- # homepage: https://github.com/agentstation/tokenizer
126- # description: "High-performance Llama 3 tokenizer in Go"
127- # license: "MIT"
123+ # Homebrew tap configuration
124+ brews :
125+ - repository :
126+ owner : agentstation
127+ name : homebrew-tap
128+ token : " {{ .Env.HOMEBREW_TAP_TOKEN }}"
129+
130+ # Formula name
131+ name : tokenizer
132+
133+ # Folder inside the repository
134+ folder : Formula
135+
136+ # Git commit information
137+ commit_author :
138+ name : " GitHub Actions"
139+ 140+
141+ commit_msg_template : " Update tokenizer to {{ .Tag }}"
142+
143+ # Homepage
144+ homepage : " https://github.com/agentstation/tokenizer"
145+
146+ # Description
147+ description : " High-performance tokenizer implementations in Go with unified CLI"
148+
149+ # License
150+ license : " MIT"
151+
152+ # Skip upload if version exists
153+ skip_upload : false
154+
155+ # Dependencies
156+ dependencies :
157+ - name : go
158+ type : build
159+ version : " 1.24"
160+
161+ # Custom install block
162+ install : |
163+ if build.bottle?
164+ bin.install "tokenizer"
165+ else
166+ # Build from source with version information
167+ ldflags = %W[
168+ -s -w
169+ -X main.version=#{version}
170+ -X main.commit=#{Utils.git_short_head}
171+ -X main.buildDate=#{Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")}
172+ -X main.goVersion=#{Formula["go"].version}
173+ ]
174+ system "go", "build", *std_go_args(ldflags: ldflags), "./cmd/tokenizer"
175+ end
176+
177+ # Install documentation
178+ doc.install "README.md", "LICENSE", "CLAUDE.md"
179+ doc.install "llama3/README.md" => "llama3-README.md"
180+ doc.install "llama3/IMPLEMENTATION.md" => "llama3-IMPLEMENTATION.md"
181+
182+ # Install examples if they exist
183+ if Dir.exist?("examples")
184+ pkgshare.install "examples"
185+ end
186+
187+ # Test block
188+ test : |
189+ # Test version command
190+ output = shell_output("#{bin}/tokenizer version")
191+ assert_match version.to_s, output
192+ assert_match "commit:", output
193+ assert_match "built:", output
194+ assert_match "go version:", output
195+
196+ # Test help output
197+ assert_match "Usage:", shell_output("#{bin}/tokenizer --help")
198+ assert_match "Available Commands:", shell_output("#{bin}/tokenizer --help")
199+
200+ # Test llama3 subcommand
201+ assert_match "llama3", shell_output("#{bin}/tokenizer --help")
202+ assert_match "encode", shell_output("#{bin}/tokenizer llama3 --help")
203+
204+ # Test encoding
205+ output = shell_output("#{bin}/tokenizer llama3 encode 'Hello, world!'")
206+ assert_match "128000", output # begin_of_text token
207+ assert_match "9906", output # "Hello" token
208+ assert_match "128001", output # end_of_text token
209+
210+ # Test decoding
211+ output = shell_output("#{bin}/tokenizer llama3 decode 128000 9906 11 1917 0 128001")
212+ assert_match "Hello", output
213+ assert_match "world", output
214+
215+ # Test info command
216+ output = shell_output("#{bin}/tokenizer llama3 info")
217+ assert_match "Vocabulary Size: 128256", output
218+ assert_match "Regular Tokens: 128000", output
219+ assert_match "Special Tokens: 256", output
220+
221+ # Test piping
222+ output = pipe_output("#{bin}/tokenizer llama3 encode", "Test input")
223+ assert_match "128000", output # begin_of_text token
224+
225+ # Custom caveats (simplified from the manual formula)
226+ caveats : |
227+ Tokenizer has been installed! 🚀
228+
229+ Quick start:
230+ tokenizer llama3 encode "Hello, world!" # Encode text to tokens
231+ tokenizer llama3 decode 128000 9906 128001 # Decode tokens to text
232+ tokenizer llama3 info # Show tokenizer info
233+ tokenizer --help # Show all commands
234+
235+ Documentation: https://github.com/agentstation/tokenizer
0 commit comments