Skip to content

Commit 9a7ac44

Browse files
committed
Removed numo and simplified the implementation
1 parent cea12a3 commit 9a7ac44

File tree

8 files changed

+91
-224
lines changed

8 files changed

+91
-224
lines changed

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,9 @@ Or install it yourself as:
3535

3636
```ruby
3737
require 'annembed'
38-
require 'numo/narray'
3938

40-
# Generate some sample data
41-
data = Numo::DFloat.new(1000, 50).rand_norm
39+
# Generate some sample data (2D array)
40+
data = Array.new(1000) { Array.new(50) { rand } }
4241

4342
# Perform UMAP embedding
4443
embedding = AnnEmbed.umap(data, n_components: 2, n_neighbors: 15)

annembed-ruby.gemspec

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ Gem::Specification.new do |spec|
2828
spec.extensions = ["ext/annembed_ruby/extconf.rb"]
2929

3030
# Runtime dependencies
31-
spec.add_dependency "numo-narray", "~> 0.9"
31+
# Numo is optional but recommended for better performance
32+
# spec.add_dependency "numo-narray", "~> 0.9"
3233

3334
# Development dependencies
3435
spec.add_development_dependency "rake", "~> 13.0"

examples/basic_usage.rb

Lines changed: 0 additions & 105 deletions
This file was deleted.

lib/annembed.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ class InvalidParameterError < Error; end
2626

2727
class << self
2828
# Quick UMAP embedding
29-
# @param data [Array, Numo::NArray] Input data
29+
# @param data [Array] Input data (or Numo::NArray if available)
3030
# @param n_components [Integer] Number of dimensions in output
31-
# @return [Numo::NArray] Embedded data
31+
# @return [Array] Embedded data (or Numo::NArray if Numo is loaded)
3232
def umap(data, n_components: 2, **options)
3333
embedder = Embedder.new(method: :umap, n_components: n_components, **options)
3434
embedder.fit_transform(data)

lib/annembed/embedder.rb

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# frozen_string_literal: true
22

3-
require "numo/narray"
4-
53
module AnnEmbed
64
# Main class for performing dimensionality reduction
75
class Embedder
@@ -29,11 +27,11 @@ def initialize(method: :umap, n_components: 2, **options)
2927
def fit_transform(data)
3028
data_array = prepare_data(data)
3129

32-
@rust_embedder = RustEmbedder.new(@config.to_h)
30+
@rust_embedder = RustUMAP.new(@config.to_h)
3331
result = @rust_embedder.fit_transform(data_array)
3432
@fitted = true
3533

36-
convert_result(result)
34+
result
3735
end
3836

3937
# Fit the embedder to data
@@ -42,7 +40,7 @@ def fit_transform(data)
4240
def fit(data)
4341
data_array = prepare_data(data)
4442

45-
@rust_embedder = RustEmbedder.new(@config.to_h)
43+
@rust_embedder = RustUMAP.new(@config.to_h)
4644
@rust_embedder.fit(data_array)
4745
@fitted = true
4846

@@ -56,9 +54,7 @@ def transform(data)
5654
raise Error, "Embedder must be fitted before transform" unless fitted?
5755

5856
data_array = prepare_data(data)
59-
result = @rust_embedder.transform(data_array)
60-
61-
convert_result(result)
57+
@rust_embedder.transform(data_array)
6258
end
6359

6460
# Check if embedder has been fitted
@@ -79,7 +75,7 @@ def save(path)
7975
# @param path [String] File path
8076
# @return [Embedder] Loaded embedder
8177
def self.load(path)
82-
rust_embedder = RustEmbedder.load(path)
78+
rust_embedder = RustUMAP.load(path)
8379
embedder = allocate
8480
embedder.instance_variable_set(:@rust_embedder, rust_embedder)
8581
embedder.instance_variable_set(:@fitted, true)
@@ -91,34 +87,20 @@ def self.load(path)
9187

9288
def prepare_data(data)
9389
case data
94-
when Numo::NArray
95-
data
9690
when Array
97-
Numo::DFloat.cast(data)
91+
# Keep as array for RustUMAP
92+
data
9893
when String
9994
# Assume it's a file path
10095
load_csv_data(data)
10196
else
102-
raise ArgumentError, "Unsupported data type: #{data.class}"
103-
end
104-
end
105-
106-
def convert_result(result)
107-
# Ensure result is a Numo::NArray
108-
case result
109-
when Numo::NArray
110-
result
111-
when Array
112-
Numo::DFloat.cast(result)
113-
else
114-
result
97+
raise ArgumentError, "Unsupported data type: #{data.class}. Expected Array or String (CSV path)"
11598
end
11699
end
117100

118101
def load_csv_data(path)
119102
require "csv"
120-
data = CSV.read(path, converters: :numeric)
121-
Numo::DFloat.cast(data)
103+
CSV.read(path, converters: :numeric)
122104
end
123105
end
124106
end

lib/annembed/preprocessing.rb

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,105 @@
11
# frozen_string_literal: true
22

3-
require "numo/narray"
3+
# Pure Ruby implementation of preprocessing functions
44

55
module AnnEmbed
66
# Data preprocessing utilities
77
module Preprocessing
88
class << self
99
# Normalize data using specified method
10-
# @param data [Array, Numo::NArray] Input data
10+
# @param data [Array] Input data (2D array)
1111
# @param method [Symbol] Normalization method (:standard, :minmax, :l2)
12-
# @return [Numo::NArray] Normalized data
12+
# @return [Array] Normalized data
1313
def normalize(data, method: :standard)
14-
data_array = prepare_data(data)
14+
raise ArgumentError, "Unsupported data type: #{data.class}" unless data.is_a?(Array)
1515

1616
case method
1717
when :standard
18-
standard_normalize(data_array)
18+
standard_normalize(data)
1919
when :minmax
20-
minmax_normalize(data_array)
20+
minmax_normalize(data)
2121
when :l2
22-
l2_normalize(data_array)
22+
l2_normalize(data)
2323
else
2424
raise ArgumentError, "Unknown normalization method: #{method}"
2525
end
2626
end
2727

2828
# Reduce dimensionality using PCA before embedding
29-
# @param data [Array, Numo::NArray] Input data
29+
# @param data [Array] Input data
3030
# @param n_components [Integer] Number of PCA components
31-
# @return [Numo::NArray] Reduced data
31+
# @return [Array] Reduced data
3232
def pca_reduce(data, n_components)
33-
data_array = prepare_data(data)
34-
35-
# Use SVD for PCA
36-
mean = data_array.mean(axis: 0)
37-
centered = data_array - mean
38-
39-
u, s, vt = SVD.randomized_svd(centered, n_components)
40-
u * s
33+
# Note: This would require SVD implementation in pure Ruby
34+
# For now, raise an error suggesting to use the Rust-based SVD module
35+
raise NotImplementedError, "PCA reduction requires the SVD module which needs to be called directly"
4136
end
4237

4338
private
4439

45-
def prepare_data(data)
46-
case data
47-
when Numo::NArray
48-
data
49-
when Array
50-
Numo::DFloat.cast(data)
51-
else
52-
raise ArgumentError, "Unsupported data type: #{data.class}"
53-
end
54-
end
55-
5640
def standard_normalize(data)
57-
mean = data.mean(axis: 0)
58-
std = data.stddev(axis: 0)
59-
std[std.eq(0)] = 1.0 # Avoid division by zero
41+
# Pure Ruby implementation of standard normalization
42+
return data if data.empty?
43+
44+
# Calculate mean and std for each column
45+
n_rows = data.size
46+
n_cols = data.first.size
47+
48+
means = Array.new(n_cols, 0.0)
49+
stds = Array.new(n_cols, 0.0)
50+
51+
# Calculate means
52+
data.each do |row|
53+
row.each_with_index { |val, j| means[j] += val }
54+
end
55+
means.map! { |m| m / n_rows }
6056

61-
(data - mean) / std
57+
# Calculate standard deviations
58+
data.each do |row|
59+
row.each_with_index { |val, j| stds[j] += (val - means[j]) ** 2 }
60+
end
61+
stds.map! { |s| Math.sqrt(s / n_rows) }
62+
stds.map! { |s| s == 0 ? 1.0 : s } # Avoid division by zero
63+
64+
# Normalize
65+
data.map do |row|
66+
row.map.with_index { |val, j| (val - means[j]) / stds[j] }
67+
end
6268
end
6369

6470
def minmax_normalize(data)
65-
min = data.min(axis: 0)
66-
max = data.max(axis: 0)
67-
range = max - min
68-
range[range.eq(0)] = 1.0 # Avoid division by zero
71+
# Pure Ruby implementation of min-max normalization
72+
return data if data.empty?
73+
74+
n_cols = data.first.size
75+
mins = Array.new(n_cols) { Float::INFINITY }
76+
maxs = Array.new(n_cols) { -Float::INFINITY }
77+
78+
# Find min and max for each column
79+
data.each do |row|
80+
row.each_with_index do |val, j|
81+
mins[j] = val if val < mins[j]
82+
maxs[j] = val if val > maxs[j]
83+
end
84+
end
85+
86+
# Calculate ranges
87+
ranges = mins.zip(maxs).map { |min, max| max - min }
88+
ranges.map! { |r| r == 0 ? 1.0 : r } # Avoid division by zero
6989

70-
(data - min) / range
90+
# Normalize
91+
data.map do |row|
92+
row.map.with_index { |val, j| (val - mins[j]) / ranges[j] }
93+
end
7194
end
7295

7396
def l2_normalize(data)
74-
norms = Numo::NMath.sqrt((data**2).sum(axis: 1))
75-
norms[norms.eq(0)] = 1.0 # Avoid division by zero
76-
77-
data / norms.expand_dims(1)
97+
# Pure Ruby implementation of L2 normalization
98+
data.map do |row|
99+
norm = Math.sqrt(row.sum { |val| val ** 2 })
100+
norm = 1.0 if norm == 0 # Avoid division by zero
101+
row.map { |val| val / norm }
102+
end
78103
end
79104
end
80105
end

0 commit comments

Comments
 (0)