@@ -8,6 +8,8 @@ defmodule ExMaxsimCpu do
88
99 ## Usage with Nx Tensors
1010
11+ ExMaxsimCpu exposes an Nx-only public API.
12+
1113 # Query: [q_len, dim] tensor
1214 query = Nx.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], type: :f32)
1315
@@ -20,15 +22,6 @@ defmodule ExMaxsimCpu do
2022 scores = ExMaxsimCpu.maxsim_scores(query, docs)
2123 # => #Nx.Tensor<f32[2]>
2224
23- ## Usage with Raw Binaries
24-
25- For advanced use cases, you can use raw binaries directly:
26-
27- query_bin = <<0.1::float-32-native, 0.2::float-32-native, ...>>
28- docs_bin = <<...>>
29-
30- scores = ExMaxsimCpu.maxsim_scores_raw(query_bin, q_len, dim, docs_bin, n_docs, d_len)
31-
3225 ## Performance Notes
3326
3427 - Uses Dirty CPU schedulers for compute-intensive operations
@@ -108,7 +101,7 @@ defmodule ExMaxsimCpu do
108101 @ spec maxsim_scores_variable ( Nx.Tensor . t ( ) , [ Nx.Tensor . t ( ) ] ) :: Nx.Tensor . t ( )
109102 def maxsim_scores_variable ( _query , [ ] ) do
110103 raise ArgumentError ,
111- "Empty document list not supported (Nx cannot create empty tensors). Use maxsim_scores_variable_raw/5 for empty lists. "
104+ "Empty document list not supported (Nx cannot create empty tensors)."
112105 end
113106
114107 def maxsim_scores_variable ( query , docs ) when is_list ( docs ) do
@@ -145,103 +138,6 @@ defmodule ExMaxsimCpu do
145138 Nx . from_binary ( scores_bin , :f32 )
146139 end
147140
148- @ doc """
149- Compute MaxSim scores using raw binaries (advanced API).
150-
151- This is a lower-level API for users who want to avoid Nx tensor overhead.
152-
153- ## Parameters
154-
155- - `query_bin`: Binary containing query vectors as f32 values (native endian)
156- - `q_len`: Number of query tokens
157- - `dim`: Embedding dimension
158- - `docs_bin`: Binary containing document vectors as f32 values
159- - `n_docs`: Number of documents
160- - `d_len`: Number of tokens per document (must be uniform)
161-
162- ## Returns
163-
164- Binary containing n_docs f32 scores.
165- """
166- @ spec maxsim_scores_raw (
167- binary ( ) ,
168- pos_integer ( ) ,
169- pos_integer ( ) ,
170- binary ( ) ,
171- pos_integer ( ) ,
172- pos_integer ( )
173- ) ::
174- binary ( )
175- def maxsim_scores_raw ( query_bin , q_len , dim , docs_bin , n_docs , d_len )
176- when is_binary ( query_bin ) and is_binary ( docs_bin ) and
177- is_integer ( q_len ) and q_len > 0 and
178- is_integer ( dim ) and dim > 0 and
179- is_integer ( n_docs ) and n_docs > 0 and
180- is_integer ( d_len ) and d_len > 0 do
181- expected_query_size = q_len * dim * 4
182- expected_docs_size = n_docs * d_len * dim * 4
183-
184- if byte_size ( query_bin ) != expected_query_size do
185- raise ArgumentError ,
186- "Query binary size mismatch: expected #{ expected_query_size } , got #{ byte_size ( query_bin ) } "
187- end
188-
189- if byte_size ( docs_bin ) != expected_docs_size do
190- raise ArgumentError ,
191- "Docs binary size mismatch: expected #{ expected_docs_size } , got #{ byte_size ( docs_bin ) } "
192- end
193-
194- Nif . maxsim_scores_nif ( query_bin , q_len , dim , docs_bin , n_docs , d_len )
195- end
196-
197- @ doc """
198- Compute MaxSim scores for variable-length documents using raw binaries (advanced API).
199-
200- ## Parameters
201-
202- - `query_bin`: Binary containing query vectors as f32 values
203- - `q_len`: Number of query tokens
204- - `dim`: Embedding dimension
205- - `doc_bins`: List of binaries, each containing a document's vectors
206- - `doc_lens`: List of token counts for each document
207-
208- ## Returns
209-
210- Binary containing n_docs f32 scores.
211- """
212- @ spec maxsim_scores_variable_raw ( binary ( ) , pos_integer ( ) , pos_integer ( ) , [ binary ( ) ] , [
213- pos_integer ( )
214- ] ) ::
215- binary ( )
216- def maxsim_scores_variable_raw ( _query_bin , _q_len , _dim , [ ] , [ ] ) , do: << >>
217-
218- def maxsim_scores_variable_raw ( query_bin , q_len , dim , doc_bins , doc_lens )
219- when is_binary ( query_bin ) and is_list ( doc_bins ) and is_list ( doc_lens ) do
220- expected_query_size = q_len * dim * 4
221-
222- if byte_size ( query_bin ) != expected_query_size do
223- raise ArgumentError ,
224- "Query binary size mismatch: expected #{ expected_query_size } , got #{ byte_size ( query_bin ) } "
225- end
226-
227- if length ( doc_bins ) != length ( doc_lens ) do
228- raise ArgumentError , "doc_bins and doc_lens must have the same length"
229- end
230-
231- # Validate each document binary size
232- Enum . zip ( doc_bins , doc_lens )
233- |> Enum . with_index ( )
234- |> Enum . each ( fn { { bin , len } , idx } ->
235- expected = len * dim * 4
236-
237- if byte_size ( bin ) != expected do
238- raise ArgumentError ,
239- "Doc #{ idx } binary size mismatch: expected #{ expected } , got #{ byte_size ( bin ) } "
240- end
241- end )
242-
243- Nif . maxsim_scores_variable_nif ( query_bin , q_len , dim , doc_bins , doc_lens )
244- end
245141
246142 # Private helpers
247143
0 commit comments