fix(router): fix panics on partial_cmp and empty req.texts (#138)

OlivierDehaene · web-flow · commit 41312fb2af34 · 2024-01-24T18:46:50.000+01:00
diff --git a/backends/candle/build.rs b/backends/candle/build.rs
@@ -1,4 +1,4 @@
-use anyhow::{Context, Result, bail};
+use anyhow::{bail, Context, Result};
 
 fn main() {
     println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");
diff --git a/router/src/grpc/server.rs b/router/src/grpc/server.rs
@@ -143,18 +143,21 @@ impl TextEmbeddingsService {
             response.inference,
         );
 
-        let mut predictions: Vec<Prediction> = {
+        let mut predictions = Vec::with_capacity(response.results.len());
+        for (i, s) in response.results.into_iter().enumerate() {
+            // Check that s is not NaN or the partial_cmp below will panic
+            if s.is_nan() {
+                Err(ErrorResponse {
+                    error: "score is NaN".to_string(),
+                    error_type: ErrorType::Backend,
+                })?;
+            }
             // Map score to label
-            response
-                .results
-                .into_iter()
-                .enumerate()
-                .map(|(i, s)| Prediction {
-                    score: s,
-                    label: id2label.get(&i.to_string()).unwrap().clone(),
-                })
-                .collect()
-        };
+            predictions.push(Prediction {
+                score: s,
+                label: id2label.get(&i.to_string()).unwrap().clone(),
+            });
+        }
         // Reverse sort
         predictions.sort_by(|x, y| x.score.partial_cmp(&y.score).unwrap());
         predictions.reverse();
@@ -455,6 +458,17 @@ impl grpc::rerank_server::Rerank for TextEmbeddingsService {
 
         let request = request.into_inner();
 
+        if request.texts.is_empty() {
+            let message = "`texts` cannot be empty".to_string();
+            tracing::error!("{message}");
+            let err = ErrorResponse {
+                error: message,
+                error_type: ErrorType::Validation,
+            };
+            metrics::increment_counter!("te_request_failure", "err" => "validation");
+            Err(err)?;
+        }
+
         match &self.info.model_type {
             ModelType::Classifier(_) => {
                 metrics::increment_counter!("te_request_failure", "err" => "model_type");
@@ -549,10 +563,19 @@ impl grpc::rerank_server::Rerank for TextEmbeddingsService {
                 None
             };
 
+            let score = r.4;
+            // Check that s is not NaN or the partial_cmp below will panic
+            if score.is_nan() {
+                Err(ErrorResponse {
+                    error: "score is NaN".to_string(),
+                    error_type: ErrorType::Backend,
+                })?;
+            }
+
             ranks.push(Rank {
                 index: index as u32,
                 text,
-                score: r.4,
+                score,
             })
         }
 
@@ -766,10 +789,19 @@ impl grpc::rerank_server::Rerank for TextEmbeddingsService {
                 None
             };
 
+            let score = r.5;
+            // Check that s is not NaN or the partial_cmp below will panic
+            if score.is_nan() {
+                Err(ErrorResponse {
+                    error: "score is NaN".to_string(),
+                    error_type: ErrorType::Backend,
+                })?;
+            }
+
             ranks.push(Rank {
                 index: r.0 as u32,
                 text,
-                score: r.5,
+                score,
             })
         }
 
diff --git a/router/src/http/server.rs b/router/src/http/server.rs
@@ -116,18 +116,21 @@ async fn predict(
             _ => panic!(),
         };
 
-        let mut predictions: Vec<Prediction> = {
+        let mut predictions = Vec::with_capacity(response.results.len());
+        for (i, s) in response.results.into_iter().enumerate() {
+            // Check that s is not NaN or the partial_cmp below will panic
+            if s.is_nan() {
+                return Err(ErrorResponse {
+                    error: "score is NaN".to_string(),
+                    error_type: ErrorType::Backend,
+                });
+            }
             // Map score to label
-            response
-                .results
-                .into_iter()
-                .enumerate()
-                .map(|(i, s)| Prediction {
-                    score: s,
-                    label: id2label.get(&i.to_string()).unwrap().clone(),
-                })
-                .collect()
-        };
+            predictions.push(Prediction {
+                score: s,
+                label: id2label.get(&i.to_string()).unwrap().clone(),
+            });
+        }
         // Reverse sort
         predictions.sort_by(|x, y| x.score.partial_cmp(&y.score).unwrap());
         predictions.reverse();
@@ -282,6 +285,17 @@ async fn rerank(
     let span = tracing::Span::current();
     let start_time = Instant::now();
 
+    if req.texts.is_empty() {
+        let message = "`texts` cannot be empty".to_string();
+        tracing::error!("{message}");
+        let err = ErrorResponse {
+            error: message,
+            error_type: ErrorType::Validation,
+        };
+        metrics::increment_counter!("te_request_failure", "err" => "validation");
+        Err(err)?;
+    }
+
     match &info.model_type {
         ModelType::Classifier(_) => {
             metrics::increment_counter!("te_request_failure", "err" => "model_type");
@@ -383,11 +397,16 @@ async fn rerank(
                 None
             };
 
-            ranks.push(Rank {
-                index,
-                text,
-                score: r.4,
-            })
+            let score = r.4;
+            // Check that s is not NaN or the partial_cmp below will panic
+            if score.is_nan() {
+                Err(ErrorResponse {
+                    error: "score is NaN".to_string(),
+                    error_type: ErrorType::Backend,
+                })?;
+            }
+
+            ranks.push(Rank { index, text, score })
         }
 
         // Reverse sort

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-use anyhow::{Context, Result, bail};`
	`1`	`+use anyhow::{bail, Context, Result};`
`2`	`2`
`3`	`3`	`fn main() {`
`4`	`4`	`println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");`