More updates for OpenAI Embeddings model (#7)

mattjohnsonpint · web-flow · commit f4013171e861 · 2024-06-24T09:55:37.000-07:00
* Fix encoding type

* Use an f32[] for output embeddings

* Add JS docs

* Update CHANGELOG.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,8 @@
 
 ## UNRELEASED
 
-- Update OpenAI Embeddings model to support all allowed types of input
+- Update OpenAI Embeddings model to support all allowed types of input [#6](https://github.com/hypermodeAI/models-as/pull/6)
+- More updates for OpenAI Embeddings model [#7](https://github.com/hypermodeAI/models-as/pull/7)
 
 ## 2024-06-28 - Version 0.1.6
 
diff --git a/src/models/openai/embeddings.ts b/src/models/openai/embeddings.ts
@@ -1,8 +1,25 @@
 import { Model } from "../..";
 
-// Reference: https://platform.openai.com/docs/api-reference/embeddings
-
+/**
+ * Provides input and output types that conform to the OpenAI Embeddings API.
+ *
+ * Reference: https://platform.openai.com/docs/api-reference/embeddings
+ */
 export class EmbeddingsModel extends Model<EmbeddingsInput, EmbeddingsOutput> {
+  /**
+   * Creates an input object for the OpenAI Embeddings API.
+   *
+   * @param content The input content to vectorize.  Can be any of:
+   * - A string representing the text to vectorize.
+   * - An array of strings representing multiple texts to vectorize.
+   * - An array of integers representing pre-tokenized text to vectorize.
+   * - An array of arrays of integers representing multiple pre-tokenized texts to vectorize.
+   *
+   * @returns An input object that can be passed to the `invoke` method.
+   *
+   * @remarks
+   * The input content must not exceed the maximum token limit of the model.
+   */
   createInput<T>(content: T): EmbeddingsInput {
     const model = this.info.fullName;
 
@@ -32,64 +49,143 @@ export class EmbeddingsModel extends Model<EmbeddingsInput, EmbeddingsOutput> {
   }
 }
 
-
+/**
+ * The input object for the OpenAI Embeddings API.
+ */
 @json
 class EmbeddingsInput {
+  /**
+   * The name of the model to use for the embeddings.
+   * Must be the exact string expected by the model provider.
+   * For example, "text-embedding-3-small".
+   *
+   * @remarks
+   * This field is automatically set by the `createInput` method when creating this object.
+   * It does not need to be set manually.
+   */
   model!: string;
 
-
-  @omitif("this.encodingFormat.type == 'float'")
-  encodingFormat: EncodingFormat = EncodingFormat.Float;
-
-
+  /**
+   * The encoding format for the output embeddings.
+   *
+   * @default EncodingFormat.Float
+   *
+   * @remarks
+   * Currently only `EncodingFormat.Float` is supported.
+   */
+  @alias("encoding_format")
+  @omitif("this.encodingFormat == 'float'")
+  encodingFormat: string = EncodingFormat.Float;
+
+  /**
+   * The maximum number of dimensions for the output embeddings.
+   * If not specified, the model's default number of dimensions will be used.
+   */
   @omitif("this.dimensions == -1")
   dimensions: i32 = -1; // TODO: make this an `i32 | null` when supported
 
-
+  /**
+   * The user ID to associate with the request.
+   * If not specified, the request will be anonymous.
+   * See https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids
+   */
   @omitnull()
   user: string | null = null;
 }
 
-
+/**
+ * The input object for the OpenAI Embeddings API.
+ */
 @json
 class TypedEmbeddingsInput<T> extends EmbeddingsInput {
+  /**
+   * The input content to vectorize.
+   */
   input!: T;
 }
 
-
+/**
+ * The output object for the OpenAI Embeddings API.
+ */
 @json
 class EmbeddingsOutput {
+  /**
+   * The name of the output object type returned by the API.
+   * Always `"list"`.
+   */
   object!: string;
+
+  /**
+   * The name of the model used to generate the embeddings.
+   * In most cases, this will match the requested `model` field in the input.
+   */
   model!: string;
+
+  /**
+   * The usage statistics for the request.
+   */
   usage!: Usage;
+
+  /**
+   * The output vector embeddings data.
+   */
   data!: Embedding[];
 }
 
-
-@json
-export class EncodingFormat {
-  type: string = "float";
-
-  static Float: EncodingFormat = { type: "float" };
-  static Base64: EncodingFormat = { type: "base64" };
+/**
+ * The encoding format for the output embeddings.
+ */
+// eslint-disable-next-line @typescript-eslint/no-namespace
+export namespace EncodingFormat {
+  /**
+   * The output embeddings are encoded as an array of floating-point numbers.
+   */
+  export const Float = "float";
+
+  /**
+   * The output embeddings are encoded as a base64-encoded string,
+   * containing an binary representation of an array of floating-point numbers.
+   *
+   * @remarks
+   * This format is currently not supported through this interface.
+   */
+  export const Base64 = "base64";
 }
+export type EncodingFormat = string;
 
-
+/**
+ * The output vector embeddings data.
+ */
 @json
 class Embedding {
+  /**
+   * The name of the output object type returned by the API.
+   * Always `"embedding"`.
+   */
   object!: string;
+
+  /**
+   * The index of the input text that corresponds to this embedding.
+   * Used when requesting embeddings for multiple texts.
+   */
   index!: i32;
-  embedding!: f64[];
+  embedding!: f32[]; // TODO: support `f32[] | string` based on input encoding format
 }
 
-
+/**
+ * The usage statistics for the request.
+ */
 @json
 class Usage {
-
+  /**
+   * The number of prompt tokens used in the request.
+   */
   @alias("prompt_tokens")
   promptTokens!: i32;
 
-
+  /**
+   * The total number of tokens used in the request.
+   */
   @alias("total_tokens")
   totalTokens!: i32;
 }