Skip to content

Commit 2c8c6e5

Browse files
committed
Refactor train-umap/apply-umap into a ragnar umap train/ragnar umap apply Thor subcommand group with new specs and updated string references.
1 parent 32ca1d9 commit 2c8c6e5

File tree

9 files changed

+227
-84
lines changed

9 files changed

+227
-84
lines changed

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
source "https://rubygems.org"
44

55
gemspec
6+
#gem "thor-interactive", path: "../thor-interactive"

lib/ragnar/cli.rb

Lines changed: 3 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
require_relative "cli/umap"
12
require_relative "cli_visualization"
23
require_relative "config"
34
require "thor/interactive"
@@ -116,83 +117,8 @@ def index(path)
116117
end
117118
end
118119

119-
desc "train-umap", "Train UMAP model on existing embeddings"
120-
option :db_path, type: :string, desc: "Path to Lance database (default from config)"
121-
option :n_components, type: :numeric, default: 50, desc: "Number of dimensions for reduction"
122-
option :n_neighbors, type: :numeric, default: 15, desc: "Number of neighbors for UMAP"
123-
option :min_dist, type: :numeric, default: 0.1, desc: "Minimum distance for UMAP"
124-
option :model_path, type: :string, desc: "Path to save UMAP model"
125-
def train_umap
126-
say "Training UMAP model on embeddings...", :green
127-
128-
config = Config.instance
129-
# Use model_path from options if provided, otherwise use config models_dir
130-
model_path = if options[:model_path]
131-
options[:model_path]
132-
else
133-
File.join(config.models_dir, "umap_model.bin")
134-
end
135-
136-
processor = UmapProcessor.new(
137-
db_path: options[:db_path] || config.database_path,
138-
model_path: model_path
139-
)
140-
141-
begin
142-
stats = processor.train(
143-
n_components: options[:n_components] || 50,
144-
n_neighbors: options[:n_neighbors] || 15,
145-
min_dist: options[:min_dist] || 0.1
146-
)
147-
148-
say "\nUMAP training complete!", :green
149-
say "Embeddings processed: #{stats[:embeddings_count]}"
150-
say "Original dimensions: #{stats[:original_dims]}"
151-
say "Reduced dimensions: #{stats[:reduced_dims]}"
152-
say "Model saved to: #{processor.model_path}"
153-
rescue => e
154-
say "Error during UMAP training: #{e.message}", :red
155-
exit 1
156-
end
157-
end
158-
159-
desc "apply-umap", "Apply trained UMAP model to reduce embedding dimensions"
160-
option :db_path, type: :string, desc: "Path to Lance database (default from config)"
161-
option :model_path, type: :string, desc: "Path to UMAP model"
162-
option :batch_size, type: :numeric, default: 100, desc: "Batch size for processing"
163-
def apply_umap
164-
config = Config.instance
165-
model_path = if options[:model_path]
166-
options[:model_path]
167-
else
168-
File.join(config.models_dir, "umap_model.bin")
169-
end
170-
171-
unless File.exist?(model_path)
172-
say "Error: UMAP model not found at: #{model_path}", :red
173-
say "Please run 'train-umap' first to create a model.", :yellow
174-
exit 1
175-
end
176-
177-
say "Applying UMAP model to embeddings...", :green
178-
179-
processor = UmapProcessor.new(
180-
db_path: options[:db_path] || config.database_path,
181-
model_path: model_path
182-
)
183-
184-
begin
185-
stats = processor.apply(batch_size: options[:batch_size] || 100)
186-
187-
say "\nUMAP application complete!", :green
188-
say "Embeddings processed: #{stats[:processed]}"
189-
say "Already processed: #{stats[:skipped]}"
190-
say "Errors: #{stats[:errors]}" if stats[:errors] > 0
191-
rescue => e
192-
say "Error applying UMAP: #{e.message}", :red
193-
exit 1
194-
end
195-
end
120+
desc "umap SUBCOMMAND", "UMAP dimensionality reduction commands"
121+
subcommand "umap", CLI::Umap
196122

197123
desc "topics", "Extract and display topics from indexed documents"
198124
option :db_path, type: :string, desc: "Path to Lance database (default from config)"

lib/ragnar/cli/umap.rb

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# frozen_string_literal: true
2+
3+
module Ragnar
4+
class CLI < Thor
5+
class Umap < Thor
6+
desc "train", "Train UMAP model on existing embeddings"
7+
option :db_path, type: :string, desc: "Path to Lance database (default from config)"
8+
option :n_components, type: :numeric, default: 50, desc: "Number of dimensions for reduction"
9+
option :n_neighbors, type: :numeric, default: 15, desc: "Number of neighbors for UMAP"
10+
option :min_dist, type: :numeric, default: 0.1, desc: "Minimum distance for UMAP"
11+
option :model_path, type: :string, desc: "Path to save UMAP model"
12+
def train
13+
say "Training UMAP model on embeddings...", :green
14+
15+
config = Config.instance
16+
model_path = if options[:model_path]
17+
options[:model_path]
18+
else
19+
File.join(config.models_dir, "umap_model.bin")
20+
end
21+
22+
processor = UmapProcessor.new(
23+
db_path: options[:db_path] || config.database_path,
24+
model_path: model_path
25+
)
26+
27+
begin
28+
stats = processor.train(
29+
n_components: options[:n_components] || 50,
30+
n_neighbors: options[:n_neighbors] || 15,
31+
min_dist: options[:min_dist] || 0.1
32+
)
33+
34+
say "\nUMAP training complete!", :green
35+
say "Embeddings processed: #{stats[:embeddings_count]}"
36+
say "Original dimensions: #{stats[:original_dims]}"
37+
say "Reduced dimensions: #{stats[:reduced_dims]}"
38+
say "Model saved to: #{processor.model_path}"
39+
rescue => e
40+
say "Error during UMAP training: #{e.message}", :red
41+
exit 1
42+
end
43+
end
44+
45+
desc "apply", "Apply trained UMAP model to reduce embedding dimensions"
46+
option :db_path, type: :string, desc: "Path to Lance database (default from config)"
47+
option :model_path, type: :string, desc: "Path to UMAP model"
48+
option :batch_size, type: :numeric, default: 100, desc: "Batch size for processing"
49+
def apply
50+
config = Config.instance
51+
model_path = if options[:model_path]
52+
options[:model_path]
53+
else
54+
File.join(config.models_dir, "umap_model.bin")
55+
end
56+
57+
unless File.exist?(model_path)
58+
say "Error: UMAP model not found at: #{model_path}", :red
59+
say "Please run 'ragnar umap train' first to create a model.", :yellow
60+
exit 1
61+
end
62+
63+
say "Applying UMAP model to embeddings...", :green
64+
65+
processor = UmapProcessor.new(
66+
db_path: options[:db_path] || config.database_path,
67+
model_path: model_path
68+
)
69+
70+
begin
71+
stats = processor.apply(batch_size: options[:batch_size] || 100)
72+
73+
say "\nUMAP application complete!", :green
74+
say "Embeddings processed: #{stats[:processed]}"
75+
say "Already processed: #{stats[:skipped]}"
76+
say "Errors: #{stats[:errors]}" if stats[:errors] > 0
77+
rescue => e
78+
say "Error applying UMAP: #{e.message}", :red
79+
exit 1
80+
end
81+
end
82+
end
83+
end
84+
end

lib/ragnar/indexer.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def suggest_umap_training
402402
puts "Training a UMAP model can significantly improve query performance."
403403
puts ""
404404
puts "To train UMAP, run:"
405-
puts " ragnar train-umap"
405+
puts " ragnar umap train"
406406
puts ""
407407
puts "This will:"
408408
puts " • Reduce embedding dimensions from #{stats[:embedding_dims]} to 50-64"
@@ -427,7 +427,7 @@ def check_umap_retraining_needed?
427427
if current_doc_count > model_doc_count * 2
428428
puts "\n⚠️ UMAP model may be outdated"
429429
puts "Model was trained on #{model_doc_count} documents, now have #{current_doc_count}"
430-
puts "Consider retraining with: ragnar train-umap"
430+
puts "Consider retraining with: ragnar umap train"
431431
return true
432432
end
433433
rescue => e

lib/ragnar/umap_processor.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def train(n_components: Ragnar::DEFAULT_REDUCED_DIMENSIONS, n_neighbors: 15, min
160160
error_msg += " • There are duplicate or corrupted embeddings\n\n"
161161
error_msg += "Suggested solutions:\n"
162162
error_msg += " 1. Try with more conservative parameters:\n"
163-
error_msg += " ragnar train-umap --n-components 10 --n-neighbors 5\n\n"
163+
error_msg += " ragnar umap train --n-components 10 --n-neighbors 5\n\n"
164164
error_msg += " 2. Re-index your documents to regenerate embeddings:\n"
165165
error_msg += " ragnar index <path> --force\n\n"
166166
error_msg += " 3. Check your embedding model configuration\n\n"
@@ -173,7 +173,7 @@ def train(n_components: Ragnar::DEFAULT_REDUCED_DIMENSIONS, n_neighbors: 15, min
173173
error_msg += "Error: #{e.message}\n\n"
174174
error_msg += "This may be due to incompatible parameters or data issues.\n"
175175
error_msg += "Try using more conservative parameters:\n"
176-
error_msg += " ragnar train-umap --n-components 10 --n-neighbors 5\n"
176+
error_msg += " ragnar umap train --n-components 10 --n-neighbors 5\n"
177177
end
178178

179179
raise RuntimeError, error_msg

lib/ragnar/umap_transform_service.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def load_model!
160160
return if @umap_model
161161

162162
unless File.exist?(@model_path)
163-
raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar train-umap'."
163+
raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar umap train'."
164164
end
165165

166166
@umap_model = ClusterKit::Dimensionality::UMAP.load_model(@model_path)

ragnar.gemspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Gem::Specification.new do |spec|
3636
spec.add_dependency "baran", "~> 0.2"
3737
spec.add_dependency "parsekit", "~> 0.1", ">= 0.1.2"
3838
spec.add_dependency "tty-progressbar", "~> 0.18"
39-
spec.add_dependency "thor-interactive", "~> 0.1.0.pre.4"
39+
spec.add_dependency "thor-interactive", "~> 0.1.0.pre.5"
4040

4141
# Development dependencies
4242
spec.add_development_dependency "rake", "~> 13.0"

spec/unit/cli_umap_spec.rb

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# frozen_string_literal: true
2+
3+
require "spec_helper"
4+
5+
RSpec.describe Ragnar::CLI::Umap do
6+
let(:config) do
7+
instance_double(
8+
Ragnar::Config,
9+
database_path: "./ragnar_database",
10+
models_dir: "/tmp/ragnar_test_models"
11+
)
12+
end
13+
let(:model_path) { File.join(config.models_dir, "umap_model.bin") }
14+
15+
before do
16+
allow(Ragnar::Config).to receive(:instance).and_return(config)
17+
end
18+
19+
describe "train" do
20+
let(:processor) { instance_double(Ragnar::UmapProcessor, model_path: model_path) }
21+
22+
before do
23+
allow(Ragnar::UmapProcessor).to receive(:new).and_return(processor)
24+
end
25+
26+
it "trains UMAP model with default options" do
27+
stats = { embeddings_count: 100, original_dims: 768, reduced_dims: 50 }
28+
allow(processor).to receive(:train).and_return(stats)
29+
30+
expect {
31+
Ragnar::CLI.start(["umap", "train"])
32+
}.not_to raise_error
33+
end
34+
35+
it "passes custom options to the processor" do
36+
stats = { embeddings_count: 100, original_dims: 768, reduced_dims: 10 }
37+
expect(Ragnar::UmapProcessor).to receive(:new).with(
38+
db_path: "./ragnar_database",
39+
model_path: model_path
40+
).and_return(processor)
41+
42+
expect(processor).to receive(:train).with(
43+
n_components: 10,
44+
n_neighbors: 5,
45+
min_dist: 0.1
46+
).and_return(stats)
47+
48+
Ragnar::CLI.start(["umap", "train", "--n-components", "10", "--n-neighbors", "5"])
49+
end
50+
51+
it "uses custom model path when provided" do
52+
custom_path = "/custom/path/model.bin"
53+
stats = { embeddings_count: 100, original_dims: 768, reduced_dims: 50 }
54+
55+
expect(Ragnar::UmapProcessor).to receive(:new).with(
56+
db_path: "./ragnar_database",
57+
model_path: custom_path
58+
).and_return(processor)
59+
60+
allow(processor).to receive(:train).and_return(stats)
61+
62+
Ragnar::CLI.start(["umap", "train", "--model-path", custom_path])
63+
end
64+
65+
it "uses custom db path when provided" do
66+
custom_db = "/custom/db/path"
67+
stats = { embeddings_count: 100, original_dims: 768, reduced_dims: 50 }
68+
69+
expect(Ragnar::UmapProcessor).to receive(:new).with(
70+
db_path: custom_db,
71+
model_path: model_path
72+
).and_return(processor)
73+
74+
allow(processor).to receive(:train).and_return(stats)
75+
76+
Ragnar::CLI.start(["umap", "train", "--db-path", custom_db])
77+
end
78+
79+
it "handles training errors gracefully" do
80+
allow(processor).to receive(:train).and_raise("No embeddings found")
81+
82+
expect {
83+
Ragnar::CLI.start(["umap", "train"])
84+
}.to raise_error(SystemExit)
85+
end
86+
end
87+
88+
describe "apply" do
89+
let(:processor) { instance_double(Ragnar::UmapProcessor) }
90+
91+
before do
92+
allow(Ragnar::UmapProcessor).to receive(:new).and_return(processor)
93+
allow(File).to receive(:exist?).and_call_original
94+
end
95+
96+
it "applies UMAP model when model exists" do
97+
allow(File).to receive(:exist?).with(model_path).and_return(true)
98+
stats = { processed: 50, skipped: 0, errors: 0 }
99+
allow(processor).to receive(:apply).and_return(stats)
100+
101+
expect {
102+
Ragnar::CLI.start(["umap", "apply"])
103+
}.not_to raise_error
104+
end
105+
106+
it "passes batch size option to the processor" do
107+
allow(File).to receive(:exist?).with(model_path).and_return(true)
108+
stats = { processed: 50, skipped: 0, errors: 0 }
109+
110+
expect(processor).to receive(:apply).with(batch_size: 200).and_return(stats)
111+
112+
Ragnar::CLI.start(["umap", "apply", "--batch-size", "200"])
113+
end
114+
115+
it "exits with error when model does not exist" do
116+
allow(File).to receive(:exist?).with(model_path).and_return(false)
117+
118+
expect {
119+
Ragnar::CLI.start(["umap", "apply"])
120+
}.to raise_error(SystemExit)
121+
end
122+
123+
it "handles apply errors gracefully" do
124+
allow(File).to receive(:exist?).with(model_path).and_return(true)
125+
allow(processor).to receive(:apply).and_raise("Database error")
126+
127+
expect {
128+
Ragnar::CLI.start(["umap", "apply"])
129+
}.to raise_error(SystemExit)
130+
end
131+
end
132+
end

spec/unit/umap_transform_service_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@
336336
it "raises error with helpful message" do
337337
expect {
338338
service.send(:load_model!)
339-
}.to raise_error(/UMAP model not found.*train-umap/)
339+
}.to raise_error(/UMAP model not found.*umap train/)
340340
end
341341
end
342342
end

0 commit comments

Comments
 (0)