Task delegate 10: refactor BertQuestionAnswerer

lu-wang-g · tflite-support-robot · commit fac2a1755feb · 2021-10-01T21:30:38.000-07:00
This is a preparation for integrating BaseOptions. Major changes include:
(1) Switched to BertQuestionAnswerer::CreateFromOptions (the new API) in the JNI layer.
(2) Properly returned error codes and messages from JNI.
(3) Updated the Javadoc.

PiperOrigin-RevId: 400354463
diff --git a/tensorflow_lite_support/java/src/java/org/tensorflow/lite/task/text/qa/BertQuestionAnswerer.java b/tensorflow_lite_support/java/src/java/org/tensorflow/lite/task/text/qa/BertQuestionAnswerer.java
@@ -26,36 +26,39 @@
 import org.tensorflow.lite.task.core.TaskJniUtils.EmptyHandleProvider;
 import org.tensorflow.lite.task.core.TaskJniUtils.MultipleBuffersHandleProvider;
 
-/** Task API for BertQA models. */
+/**
+ * Returns the most possible answers on a given question for QA models (BERT, Albert, etc.).
+ *
+ * <p>The API expects a Bert based TFLite model with metadata containing the following information:
+ *
+ * <ul>
+ *   <li>input_process_units for Wordpiece/Sentencepiece Tokenizer - Wordpiece Tokenizer can be used
+ *       for a <a
+ *       href="https://tfhub.dev/tensorflow/lite-model/mobilebert/1/default/1">MobileBert</a> model,
+ *       Sentencepiece Tokenizer Tokenizer can be used for an <a
+ *       href="https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1">Albert</a> model.
+ *   <li>3 input tensors with names "ids", "mask" and "segment_ids".
+ *   <li>2 output tensors with names "end_logits" and "start_logits".
+ * </ul>
+ */
 public class BertQuestionAnswerer extends BaseTaskApi implements QuestionAnswerer {
   private static final String BERT_QUESTION_ANSWERER_NATIVE_LIBNAME = "task_text_jni";
-
-  private BertQuestionAnswerer(long nativeHandle) {
-    super(nativeHandle);
-  }
+  private static final int OPTIONAL_FD_LENGTH = -1;
+  private static final int OPTIONAL_FD_OFFSET = -1;
 
   /**
-   * Generic API to create the QuestionAnswerer for bert models with metadata populated. The API
-   * expects a Bert based TFLite model with metadata containing the following information:
-   *
-   * <ul>
-   *   <li>input_process_units for Wordpiece/Sentencepiece Tokenizer - Wordpiece Tokenizer can be
-   *       used for a <a
-   *       href="https://tfhub.dev/tensorflow/lite-model/mobilebert/1/default/1">MobileBert</a>
-   *       model, Sentencepiece Tokenizer Tokenizer can be used for an <a
-   *       href="https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1">Albert</a>
-   *       model.
-   *   <li>3 input tensors with names "ids", "mask" and "segment_ids".
-   *   <li>2 output tensors with names "end_logits" and "start_logits".
-   * </ul>
+   * Creates a {@link BertQuestionAnswerer} instance from the default {@link
+   * BertQuestionAnswererOptions}.
    *
    * @param context android context
-   * @param pathToModel file path to the model with metadata. Note: The model should not be
-   *     compressed
-   * @return {@link BertQuestionAnswerer} instance
-   * @throws IOException If model file fails to load.
+   * @param modelPath file path to the model with metadata. Note: The model should not be compressed
+   * @return a {@link BertQuestionAnswerer} instance
+   * @throws IOException if model file fails to load
+   * @throws IllegalArgumentException if an argument is invalid
+   * @throws IllegalStateException if there is an internal error
+   * @throws RuntimeException if there is an otherwise unspecified error
    */
-  public static BertQuestionAnswerer createFromFile(Context context, String pathToModel)
+  public static BertQuestionAnswerer createFromFile(Context context, String modelPath)
       throws IOException {
     return new BertQuestionAnswerer(
         TaskJniUtils.createHandleWithMultipleAssetFilesFromLibrary(
@@ -67,97 +70,98 @@ public long createHandle(ByteBuffer... buffers) {
               }
             },
             BERT_QUESTION_ANSWERER_NATIVE_LIBNAME,
-            pathToModel));
+            modelPath));
   }
 
   /**
-   * Generic API to create the QuestionAnswerer for bert models with metadata populated. The API
-   * expects a Bert based TFLite model with metadata containing the following information:
-   *
-   * <ul>
-   *   <li>input_process_units for Wordpiece/Sentencepiece Tokenizer - Wordpiece Tokenizer can be
-   *       used for a <a
-   *       href="https://tfhub.dev/tensorflow/lite-model/mobilebert/1/default/1">MobileBert</a>
-   *       model, Sentencepiece Tokenizer Tokenizer can be used for an <a
-   *       href="https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1">Albert</a>
-   *       model.
-   *   <li>3 input tensors with names "ids", "mask" and "segment_ids".
-   *   <li>2 output tensors with names "end_logits" and "start_logits".
-   * </ul>
+   * Creates a {@link BertQuestionAnswerer} instance from the default {@link
+   * BertQuestionAnswererOptions}.
    *
-   * @param modelFile {@link File} object of the model
-   * @return {@link BertQuestionAnswerer} instance
-   * @throws IOException If model file fails to load.
+   * @param modelFile a {@link File} object of the model
+   * @return a {@link BertQuestionAnswerer} instance
+   * @throws IOException if model file fails to load
+   * @throws IllegalArgumentException if an argument is invalid
+   * @throws IllegalStateException if there is an internal error
+   * @throws RuntimeException if there is an otherwise unspecified error
    */
-  public static BertQuestionAnswerer createFromFile(File modelFile)
-      throws IOException {
+  public static BertQuestionAnswerer createFromFile(File modelFile) throws IOException {
     try (ParcelFileDescriptor descriptor =
         ParcelFileDescriptor.open(modelFile, ParcelFileDescriptor.MODE_READ_ONLY)) {
       return new BertQuestionAnswerer(
           TaskJniUtils.createHandleFromLibrary(
               new EmptyHandleProvider() {
                 @Override
                 public long createHandle() {
-                  return initJniWithFileDescriptor(descriptor.getFd());
+                  return initJniWithFileDescriptor(
+                      /*fileDescriptor=*/ descriptor.getFd(),
+                      /*fileDescriptorLength=*/ OPTIONAL_FD_LENGTH,
+                      /*fileDescriptorOffset=*/ OPTIONAL_FD_OFFSET);
                 }
               },
               BERT_QUESTION_ANSWERER_NATIVE_LIBNAME));
     }
   }
 
   /**
-   * Creates the API instance with a bert model and vocabulary file.
+   * Creates a {@link BertQuestionAnswerer} instance with a Bert model and a vocabulary file.
    *
    * <p>One suitable model is: https://tfhub.dev/tensorflow/lite-model/mobilebert/1/default/1
    *
    * @param context android context
-   * @param pathToModel file path to the bert model. Note: The model should not be compressed
-   * @param pathToVocab file path to the vocabulary file. Note: The file should not be compressed
-   * @return {@link BertQuestionAnswerer} instance
-   * @throws IOException If model file fails to load.
+   * @param modelPath file path to the Bert model. Note: The model should not be compressed
+   * @param vocabPath file path to the vocabulary file. Note: The file should not be compressed
+   * @return a {@link BertQuestionAnswerer} instance
+   * @throws IOException If model file fails to load
+   * @throws IllegalArgumentException if an argument is invalid
+   * @throws IllegalStateException if there is an internal error
+   * @throws RuntimeException if there is an otherwise unspecified error
    */
   public static BertQuestionAnswerer createBertQuestionAnswererFromFile(
-      Context context, String pathToModel, String pathToVocab) throws IOException {
+      Context context, String modelPath, String vocabPath) throws IOException {
     return new BertQuestionAnswerer(
         TaskJniUtils.createHandleWithMultipleAssetFilesFromLibrary(
             context,
             new MultipleBuffersHandleProvider() {
               @Override
               public long createHandle(ByteBuffer... buffers) {
-                return BertQuestionAnswerer.initJniWithBertByteBuffers(buffers);
+                return initJniWithBertByteBuffers(buffers);
               }
             },
             BERT_QUESTION_ANSWERER_NATIVE_LIBNAME,
-            pathToModel,
-            pathToVocab));
+            modelPath,
+            vocabPath));
   }
 
   /**
-   * Creates the API instance with an albert model and sentence piece model file.
+   * Creates a {@link BertQuestionAnswerer} instance with an Albert model and a sentence piece model
+   * file.
    *
    * <p>One suitable model is: https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1
    *
    * @param context android context
-   * @param pathToModel file path to the albert model. Note: The model should not be compressed
-   * @param pathToSentencePieceModel file path to the sentence piece model file. Note: The model
+   * @param modelPath file path to the Albert model. Note: The model should not be compressed
+   * @param sentencePieceModelPath file path to the sentence piece model file. Note: The model
    *     should not be compressed
-   * @return {@link BertQuestionAnswerer} instance
-   * @throws IOException If model file fails to load.
+   * @return a {@link BertQuestionAnswerer} instance
+   * @throws IOException If model file fails to load
+   * @throws IllegalArgumentException if an argument is invalid
+   * @throws IllegalStateException if there is an internal error
+   * @throws RuntimeException if there is an otherwise unspecified error
    */
   public static BertQuestionAnswerer createAlbertQuestionAnswererFromFile(
-      Context context, String pathToModel, String pathToSentencePieceModel) throws IOException {
+      Context context, String modelPath, String sentencePieceModelPath) throws IOException {
     return new BertQuestionAnswerer(
         TaskJniUtils.createHandleWithMultipleAssetFilesFromLibrary(
             context,
             new MultipleBuffersHandleProvider() {
               @Override
               public long createHandle(ByteBuffer... buffers) {
-                return BertQuestionAnswerer.initJniWithAlbertByteBuffers(buffers);
+                return initJniWithAlbertByteBuffers(buffers);
               }
             },
             BERT_QUESTION_ANSWERER_NATIVE_LIBNAME,
-            pathToModel,
-            pathToSentencePieceModel));
+            modelPath,
+            sentencePieceModelPath));
   }
 
   @Override
@@ -166,6 +170,10 @@ public List<QaAnswer> answer(String context, String question) {
     return answerNative(getNativeHandle(), context, question);
   }
 
+  private BertQuestionAnswerer(long nativeHandle) {
+    super(nativeHandle);
+  }
+
   // modelBuffers[0] is tflite model file buffer, and modelBuffers[1] is vocab file buffer.
   private static native long initJniWithBertByteBuffers(ByteBuffer... modelBuffers);
 
@@ -176,7 +184,8 @@ public List<QaAnswer> answer(String context, String question) {
   // modelBuffers[0] is tflite model file buffer with metadata to specify which tokenizer to use.
   private static native long initJniWithModelWithMetadataByteBuffers(ByteBuffer... modelBuffers);
 
-  private static native long initJniWithFileDescriptor(int fd);
+  private static native long initJniWithFileDescriptor(
+      int fileDescriptor, long fileDescriptorLength, long fileDescriptorOffset);
 
   private static native List<QaAnswer> answerNative(
       long nativeHandle, String context, String question);
diff --git a/tensorflow_lite_support/java/src/native/task/text/qa/bert_question_answerer_jni.cc b/tensorflow_lite_support/java/src/native/task/text/qa/bert_question_answerer_jni.cc
@@ -20,15 +20,21 @@ limitations under the License.
 
 namespace {
 
+using ::tflite::support::StatusOr;
 using ::tflite::support::utils::ConvertVectorToArrayList;
+using ::tflite::support::utils::GetExceptionClassNameForStatusCode;
 using ::tflite::support::utils::GetMappedFileBuffer;
 using ::tflite::support::utils::JStringToString;
+using ::tflite::support::utils::ThrowException;
 using ::tflite::task::text::BertQuestionAnswerer;
+using ::tflite::task::text::BertQuestionAnswererOptions;
 using ::tflite::task::text::QaAnswer;
 using ::tflite::task::text::QuestionAnswerer;
 
 constexpr int kInvalidPointer = 0;
 
+}  // namespace
+
 extern "C" JNIEXPORT void JNICALL
 Java_org_tensorflow_lite_task_text_qa_BertQuestionAnswerer_deinitJni(
     JNIEnv* env, jobject thiz, jlong native_handle) {
@@ -41,24 +47,44 @@ Java_org_tensorflow_lite_task_text_qa_BertQuestionAnswerer_initJniWithModelWithM
   absl::string_view model_with_metadata =
       GetMappedFileBuffer(env, env->GetObjectArrayElement(model_buffers, 0));
 
-  tflite::support::StatusOr<std::unique_ptr<QuestionAnswerer>> status =
-      BertQuestionAnswerer::CreateFromBuffer(
-          model_with_metadata.data(), model_with_metadata.size());
-  if (status.ok()) {
-    return reinterpret_cast<jlong>(status->release());
+  tflite::support::StatusOr<std::unique_ptr<QuestionAnswerer>> qa_status =
+      BertQuestionAnswerer::CreateFromBuffer(model_with_metadata.data(),
+                                             model_with_metadata.size());
+  if (qa_status.ok()) {
+    return reinterpret_cast<jlong>(qa_status->release());
   } else {
+    ThrowException(
+        env, GetExceptionClassNameForStatusCode(qa_status.status().code()),
+        "Error occurred when initializing BertQuestionAnswerer: %s",
+        qa_status.status().message().data());
     return kInvalidPointer;
   }
 }
-
 extern "C" JNIEXPORT jlong JNICALL
 Java_org_tensorflow_lite_task_text_qa_BertQuestionAnswerer_initJniWithFileDescriptor(
-    JNIEnv* env, jclass thiz, jint fd) {
-  tflite::support::StatusOr<std::unique_ptr<QuestionAnswerer>> status =
-      BertQuestionAnswerer::CreateFromFd(fd);
-  if (status.ok()) {
-    return reinterpret_cast<jlong>(status->release());
+    JNIEnv* env, jclass thiz, jint file_descriptor,
+    jlong file_descriptor_length, jlong file_descriptor_offset) {
+  BertQuestionAnswererOptions proto_options;
+  auto file_descriptor_meta = proto_options.mutable_base_options()
+                                  ->mutable_model_file()
+                                  ->mutable_file_descriptor_meta();
+  file_descriptor_meta->set_fd(file_descriptor);
+  if (file_descriptor_length > 0) {
+    file_descriptor_meta->set_length(file_descriptor_length);
+  }
+  if (file_descriptor_offset > 0) {
+    file_descriptor_meta->set_offset(file_descriptor_offset);
+  }
+
+  StatusOr<std::unique_ptr<QuestionAnswerer>> qa_status =
+      BertQuestionAnswerer::CreateFromOptions(proto_options);
+  if (qa_status.ok()) {
+    return reinterpret_cast<jlong>(qa_status->release());
   } else {
+    ThrowException(
+        env, GetExceptionClassNameForStatusCode(qa_status.status().code()),
+        "Error occurred when initializing BertQuestionAnswerer: %s",
+        qa_status.status().message().data());
     return kInvalidPointer;
   }
 }
@@ -71,12 +97,16 @@ Java_org_tensorflow_lite_task_text_qa_BertQuestionAnswerer_initJniWithBertByteBu
   absl::string_view vocab =
       GetMappedFileBuffer(env, env->GetObjectArrayElement(model_buffers, 1));
 
-  tflite::support::StatusOr<std::unique_ptr<QuestionAnswerer>> status =
+  StatusOr<std::unique_ptr<QuestionAnswerer>> qa_status =
       BertQuestionAnswerer::CreateBertQuestionAnswererFromBuffer(
           model.data(), model.size(), vocab.data(), vocab.size());
-  if (status.ok()) {
-    return reinterpret_cast<jlong>(status->release());
+  if (qa_status.ok()) {
+    return reinterpret_cast<jlong>(qa_status->release());
   } else {
+    ThrowException(
+        env, GetExceptionClassNameForStatusCode(qa_status.status().code()),
+        "Error occurred when initializing BertQuestionAnswerer: %s",
+        qa_status.status().message().data());
     return kInvalidPointer;
   }
 }
@@ -89,12 +119,16 @@ Java_org_tensorflow_lite_task_text_qa_BertQuestionAnswerer_initJniWithAlbertByte
   absl::string_view sp_model =
       GetMappedFileBuffer(env, env->GetObjectArrayElement(model_buffers, 1));
 
-  tflite::support::StatusOr<std::unique_ptr<QuestionAnswerer>> status =
+  StatusOr<std::unique_ptr<QuestionAnswerer>> qa_status =
       BertQuestionAnswerer::CreateAlbertQuestionAnswererFromBuffer(
           model.data(), model.size(), sp_model.data(), sp_model.size());
-  if (status.ok()) {
-    return reinterpret_cast<jlong>(status->release());
+  if (qa_status.ok()) {
+    return reinterpret_cast<jlong>(qa_status->release());
   } else {
+    ThrowException(
+        env, GetExceptionClassNameForStatusCode(qa_status.status().code()),
+        "Error occurred when initializing BertQuestionAnswerer: %s",
+        qa_status.status().message().data());
     return kInvalidPointer;
   }
 }
@@ -123,5 +157,3 @@ Java_org_tensorflow_lite_task_text_qa_BertQuestionAnswerer_answerNative(
         return qa_answer;
       });
 }
-
-}  // namespace