11use serde:: Deserialize ;
2- use serde_json:: Value ;
32use std:: { fs:: File , path:: Path } ;
43use valentinus:: embeddings:: * ;
4+ use std:: sync:: Arc ;
55
66/// Let's extract reviews and ratings
77#[ derive( Default , Deserialize ) ]
@@ -12,83 +12,66 @@ struct Review {
1212}
1313
1414fn main ( ) -> Result < ( ) , ValentinusError > {
15- let mut documents: Vec < String > = Vec :: new ( ) ;
16- let mut metadata: Vec < Vec < String > > = Vec :: new ( ) ;
17- // https://www.kaggle.com/datasets/ankkur13/edmundsconsumer-car-ratings-and-reviews?resource=download&select=Scraped_Car_Review_tesla.csv
18- let file_path = Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) )
19- . join ( "data" )
20- . join ( "Scraped_Car_Review_tesla.csv" ) ;
21- let file = File :: open ( file_path) . expect ( "csv file not found" ) ;
22- let mut rdr = csv:: Reader :: from_reader ( file) ;
23- for result in rdr. deserialize ( ) {
24- let record: Review = result. unwrap_or_default ( ) ;
25- documents. push ( record. review . unwrap_or_default ( ) ) ;
26- let rating: u64 = record
27- . rating
28- . unwrap_or_default ( )
29- . parse :: < u64 > ( )
30- . unwrap_or_default ( ) ;
31- let mut year: String = record. vehicle_title . unwrap_or_default ( ) ;
32- if !year. is_empty ( ) {
33- year = year[ 0 ..5 ] . to_string ( ) ;
34- }
35- metadata. push ( vec ! [
36- format!( r#"{{"Year": {}}}"# , year) ,
37- format!( r#"{{"Rating": {}}}"# , rating) ,
38- ] ) ;
39- }
40- let mut ids: Vec < String > = Vec :: new ( ) ;
41- for i in 0 ..documents. len ( ) {
42- ids. push ( format ! ( "id{}" , i) ) ;
43- }
44- let model_path = String :: from ( "all-Mini-LM-L6-v2_onnx" ) ;
45- let model_type = ModelType :: AllMiniLmL6V2 ;
46- let name = String :: from ( "test_collection" ) ;
47- let expected: Vec < String > = documents. clone ( ) ;
48- let mut ec: EmbeddingCollection =
49- EmbeddingCollection :: new ( documents, metadata, ids, name, model_type, model_path) ?;
50- let created_docs: & Vec < String > = ec. get_documents ( ) ;
51- assert_eq ! ( expected, created_docs. to_vec( ) ) ;
52- // save collection to db
53- ec. save ( ) ?;
54- // query the collection
55- let query_string: & String = & String :: from ( "Find the best reviews." ) ;
56- let result: CosineQueryResult = EmbeddingCollection :: cosine_query (
57- String :: from ( query_string) ,
58- String :: from ( ec. get_view ( ) ) ,
59- 10 ,
60- Some ( vec ! [
61- String :: from( r#"{ "Year": {"eq": 2017} }"# ) ,
62- String :: from( r#"{ "Rating": {"gt": 3} }"# ) ,
63- ] ) ,
64- ) ?;
65- assert_eq ! ( result. get_docs( ) . len( ) , 10 ) ;
66- let v_year: Result < Value , serde_json:: Error > =
67- serde_json:: from_str ( & result. get_metadata ( ) [ 0 ] [ 0 ] ) ;
68- let v_rating: Result < Value , serde_json:: Error > =
69- serde_json:: from_str ( & result. get_metadata ( ) [ 0 ] [ 1 ] ) ;
70- let rating_filter: u64 = 3 ;
71- let year_filter: u64 = 2017 ;
72- assert ! (
73- v_rating. map_err( |_| ValentinusError :: TestError ) ?[ "Rating" ]
74- . as_u64( )
75- . unwrap_or( 0 )
76- > rating_filter
77- ) ;
78- assert_eq ! (
79- v_year. map_err( |_| ValentinusError :: TestError ) ?[ "Year" ]
80- . as_u64( )
81- . unwrap_or( 0 ) ,
82- year_filter
83- ) ;
84- let no_filter_result: CosineQueryResult = EmbeddingCollection :: cosine_query (
85- String :: from ( query_string) ,
86- String :: from ( ec. get_view ( ) ) ,
87- 5 ,
88- None ,
89- ) ?;
90- assert_eq ! ( no_filter_result. get_docs( ) . len( ) , 5 ) ;
91- // remove collection from db
92- EmbeddingCollection :: delete ( String :: from ( ec. get_view ( ) ) ) ?;
93- Ok ( ( ) )
15+ // 1. Create a single, shared Valentinus instance.
16+ let valentinus = Arc :: new ( Valentinus :: new ( "test_env" ) ?) ;
17+
18+ // --- Data Loading ---
19+ let mut documents: Vec < String > = Vec :: new ( ) ;
20+ let mut metadata: Vec < Vec < String > > = Vec :: new ( ) ;
21+ let file_path = Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) )
22+ . join ( "data" )
23+ . join ( "Scraped_Car_Review_tesla.csv" ) ;
24+ let file = File :: open ( file_path) . expect ( "csv file not found" ) ;
25+ let mut rdr = csv:: Reader :: from_reader ( file) ;
26+ for result in rdr. deserialize ( ) {
27+ let record: Review = result. unwrap_or_default ( ) ;
28+ documents. push ( record. review . unwrap_or_default ( ) ) ;
29+ let rating: u64 = record. rating . unwrap_or_default ( ) . parse :: < u64 > ( ) . unwrap_or_default ( ) ;
30+ let mut year: String = record. vehicle_title . unwrap_or_default ( ) ;
31+ if !year. is_empty ( ) {
32+ year = year[ 0 ..5 ] . to_string ( ) ;
33+ }
34+ metadata. push ( vec ! [
35+ format!( r#"{{"Year": {}}}"# , year) ,
36+ format!( r#"{{"Rating": {}}}"# , rating) ,
37+ ] ) ;
38+ }
39+ let mut ids: Vec < String > = Vec :: new ( ) ;
40+ for i in 0 ..documents. len ( ) {
41+ ids. push ( format ! ( "id{}" , i) ) ;
42+ }
43+
44+ // 2. Define collection parameters
45+ let model_path = String :: from ( "all-MiniLM-L6-v2_onnx" ) ;
46+ let model_type = ModelType :: AllMiniLmL6V2 ;
47+ let collection_name = String :: from ( "test_collection" ) ;
48+
49+ // 3. Create the collection using the new API
50+ valentinus. create_collection (
51+ collection_name. clone ( ) ,
52+ documents,
53+ metadata,
54+ ids,
55+ model_type,
56+ model_path,
57+ ) ?;
58+
59+ // 4. Query the collection
60+ let query_string = String :: from ( "Find the best reviews." ) ;
61+ let result = valentinus. cosine_query (
62+ query_string. clone ( ) ,
63+ collection_name. clone ( ) ,
64+ 10 ,
65+ Some ( vec ! [
66+ String :: from( r#"{ "Year": {"eq": 2017} }"# ) ,
67+ String :: from( r#"{ "Rating": {"gt": 3} }"# ) ,
68+ ] ) ,
69+ ) ?;
70+
71+ assert_eq ! ( result. get_docs( ) . len( ) , 10 ) ;
72+
73+ // 5. Delete the collection
74+ valentinus. delete_collection ( & collection_name) ?;
75+
76+ Ok ( ( ) )
9477}
0 commit comments