Skip to content

Commit 6d2b8de

Browse files
committed
release v1.1.0
1 parent 47ef7d0 commit 6d2b8de

File tree

4 files changed

+751
-886
lines changed

4 files changed

+751
-886
lines changed

examples/cosine_query_example.rs

Lines changed: 63 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use serde::Deserialize;
2-
use serde_json::Value;
32
use std::{fs::File, path::Path};
43
use valentinus::embeddings::*;
4+
use std::sync::Arc;
55

66
/// Let's extract reviews and ratings
77
#[derive(Default, Deserialize)]
@@ -12,83 +12,66 @@ struct Review {
1212
}
1313

1414
fn main() -> Result<(), ValentinusError> {
15-
let mut documents: Vec<String> = Vec::new();
16-
let mut metadata: Vec<Vec<String>> = Vec::new();
17-
// https://www.kaggle.com/datasets/ankkur13/edmundsconsumer-car-ratings-and-reviews?resource=download&select=Scraped_Car_Review_tesla.csv
18-
let file_path = Path::new(env!("CARGO_MANIFEST_DIR"))
19-
.join("data")
20-
.join("Scraped_Car_Review_tesla.csv");
21-
let file = File::open(file_path).expect("csv file not found");
22-
let mut rdr = csv::Reader::from_reader(file);
23-
for result in rdr.deserialize() {
24-
let record: Review = result.unwrap_or_default();
25-
documents.push(record.review.unwrap_or_default());
26-
let rating: u64 = record
27-
.rating
28-
.unwrap_or_default()
29-
.parse::<u64>()
30-
.unwrap_or_default();
31-
let mut year: String = record.vehicle_title.unwrap_or_default();
32-
if !year.is_empty() {
33-
year = year[0..5].to_string();
34-
}
35-
metadata.push(vec![
36-
format!(r#"{{"Year": {}}}"#, year),
37-
format!(r#"{{"Rating": {}}}"#, rating),
38-
]);
39-
}
40-
let mut ids: Vec<String> = Vec::new();
41-
for i in 0..documents.len() {
42-
ids.push(format!("id{}", i));
43-
}
44-
let model_path = String::from("all-Mini-LM-L6-v2_onnx");
45-
let model_type = ModelType::AllMiniLmL6V2;
46-
let name = String::from("test_collection");
47-
let expected: Vec<String> = documents.clone();
48-
let mut ec: EmbeddingCollection =
49-
EmbeddingCollection::new(documents, metadata, ids, name, model_type, model_path)?;
50-
let created_docs: &Vec<String> = ec.get_documents();
51-
assert_eq!(expected, created_docs.to_vec());
52-
// save collection to db
53-
ec.save()?;
54-
// query the collection
55-
let query_string: &String = &String::from("Find the best reviews.");
56-
let result: CosineQueryResult = EmbeddingCollection::cosine_query(
57-
String::from(query_string),
58-
String::from(ec.get_view()),
59-
10,
60-
Some(vec![
61-
String::from(r#"{ "Year": {"eq": 2017} }"#),
62-
String::from(r#"{ "Rating": {"gt": 3} }"#),
63-
]),
64-
)?;
65-
assert_eq!(result.get_docs().len(), 10);
66-
let v_year: Result<Value, serde_json::Error> =
67-
serde_json::from_str(&result.get_metadata()[0][0]);
68-
let v_rating: Result<Value, serde_json::Error> =
69-
serde_json::from_str(&result.get_metadata()[0][1]);
70-
let rating_filter: u64 = 3;
71-
let year_filter: u64 = 2017;
72-
assert!(
73-
v_rating.map_err(|_| ValentinusError::TestError)?["Rating"]
74-
.as_u64()
75-
.unwrap_or(0)
76-
> rating_filter
77-
);
78-
assert_eq!(
79-
v_year.map_err(|_| ValentinusError::TestError)?["Year"]
80-
.as_u64()
81-
.unwrap_or(0),
82-
year_filter
83-
);
84-
let no_filter_result: CosineQueryResult = EmbeddingCollection::cosine_query(
85-
String::from(query_string),
86-
String::from(ec.get_view()),
87-
5,
88-
None,
89-
)?;
90-
assert_eq!(no_filter_result.get_docs().len(), 5);
91-
// remove collection from db
92-
EmbeddingCollection::delete(String::from(ec.get_view()))?;
93-
Ok(())
15+
// 1. Create a single, shared Valentinus instance.
16+
let valentinus = Arc::new(Valentinus::new("test_env")?);
17+
18+
// --- Data Loading ---
19+
let mut documents: Vec<String> = Vec::new();
20+
let mut metadata: Vec<Vec<String>> = Vec::new();
21+
let file_path = Path::new(env!("CARGO_MANIFEST_DIR"))
22+
.join("data")
23+
.join("Scraped_Car_Review_tesla.csv");
24+
let file = File::open(file_path).expect("csv file not found");
25+
let mut rdr = csv::Reader::from_reader(file);
26+
for result in rdr.deserialize() {
27+
let record: Review = result.unwrap_or_default();
28+
documents.push(record.review.unwrap_or_default());
29+
let rating: u64 = record.rating.unwrap_or_default().parse::<u64>().unwrap_or_default();
30+
let mut year: String = record.vehicle_title.unwrap_or_default();
31+
if !year.is_empty() {
32+
year = year[0..5].to_string();
33+
}
34+
metadata.push(vec![
35+
format!(r#"{{"Year": {}}}"#, year),
36+
format!(r#"{{"Rating": {}}}"#, rating),
37+
]);
38+
}
39+
let mut ids: Vec<String> = Vec::new();
40+
for i in 0..documents.len() {
41+
ids.push(format!("id{}", i));
42+
}
43+
44+
// 2. Define collection parameters
45+
let model_path = String::from("all-MiniLM-L6-v2_onnx");
46+
let model_type = ModelType::AllMiniLmL6V2;
47+
let collection_name = String::from("test_collection");
48+
49+
// 3. Create the collection using the new API
50+
valentinus.create_collection(
51+
collection_name.clone(),
52+
documents,
53+
metadata,
54+
ids,
55+
model_type,
56+
model_path,
57+
)?;
58+
59+
// 4. Query the collection
60+
let query_string = String::from("Find the best reviews.");
61+
let result = valentinus.cosine_query(
62+
query_string.clone(),
63+
collection_name.clone(),
64+
10,
65+
Some(vec![
66+
String::from(r#"{ "Year": {"eq": 2017} }"#),
67+
String::from(r#"{ "Rating": {"gt": 3} }"#),
68+
]),
69+
)?;
70+
71+
assert_eq!(result.get_docs().len(), 10);
72+
73+
// 5. Delete the collection
74+
valentinus.delete_collection(&collection_name)?;
75+
76+
Ok(())
9477
}

examples/nearest_query_example.rs

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)