Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* @author Christian Tzolov
* @author Josh Long
* @author Soby Chacko
* @author Jihoon Kim
* @since 1.0.0
*
*/
Expand Down Expand Up @@ -59,7 +60,7 @@ default float[] embed(String text) {
/**
* Embeds a batch of texts into vectors.
* @param texts list of texts to embed.
* @return list of list of embedded vectors.
* @return list of embedded vectors.
*/
default List<float[]> embed(List<String> texts) {
Assert.notNull(texts, "Texts must not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@

/**
* @author Christian Tzolov
* @author Jihoon Kim
*/
public class EmbeddingResultMetadata implements ResultMetadata {

public static EmbeddingResultMetadata EMPTY = new EmbeddingResultMetadata();

/**
* The {@link MimeType} of the source data used to generate the embedding.
* The {@link ModalityType} of the source data used to generate the embedding.
*/
private final ModalityType modalityType;

Expand Down Expand Up @@ -80,7 +81,7 @@ public static class ModalityUtils {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tzolov Looks like the ModalityUtils isn't being used anywhere which can safely be deleted. What do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merging and we can decide on removing this once we hear back from Christian.

private static MimeType TEXT_MIME_TYPE = MimeTypeUtils.parseMimeType("text/*");

private static MimeType IMAGE_MIME_TYPE = MimeTypeUtils.parseMimeType("text/*");
private static MimeType IMAGE_MIME_TYPE = MimeTypeUtils.parseMimeType("image/*");

private static MimeType VIDEO_MIME_TYPE = MimeTypeUtils.parseMimeType("video/*");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
* @author Soby Chacko
* @author Mark Pollack
* @author Laura Trotta
* @author Jihoon Kim
* @since 1.0.0
*/
public class TokenCountBatchingStrategy implements BatchingStrategy {
Expand All @@ -68,7 +69,7 @@ public class TokenCountBatchingStrategy implements BatchingStrategy {

private final int maxInputTokenCount;

private final ContentFormatter contentFormater;
private final ContentFormatter contentFormatter;

private final MetadataMode metadataMode;

Expand All @@ -78,9 +79,9 @@ public TokenCountBatchingStrategy() {

/**
* @param encodingType {@link EncodingType}
* @param maxInputTokenCount upper limit for input tokens
* @param thresholdFactor the threshold factor to use on top of the max input token
* count
* @param maxInputTokenCount upper limit for input tokens
*/
public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCount, double thresholdFactor) {
this(encodingType, maxInputTokenCount, thresholdFactor, Document.DEFAULT_CONTENT_FORMATTER, MetadataMode.NONE);
Expand All @@ -103,7 +104,7 @@ public TokenCountBatchingStrategy(EncodingType encodingType, int maxInputTokenCo
Assert.notNull(metadataMode, "MetadataMode must not be null");
this.tokenCountEstimator = new JTokkitTokenCountEstimator(encodingType);
this.maxInputTokenCount = (int) Math.round(maxInputTokenCount * (1 - reservePercentage));
this.contentFormater = contentFormatter;
this.contentFormatter = contentFormatter;
this.metadataMode = metadataMode;
}

Expand All @@ -122,7 +123,7 @@ public TokenCountBatchingStrategy(TokenCountEstimator tokenCountEstimator, int m
Assert.notNull(tokenCountEstimator, "TokenCountEstimator must not be null");
this.tokenCountEstimator = tokenCountEstimator;
this.maxInputTokenCount = (int) Math.round(maxInputTokenCount * (1 - reservePercentage));
this.contentFormater = contentFormatter;
this.contentFormatter = contentFormatter;
this.metadataMode = metadataMode;
}

Expand All @@ -135,7 +136,7 @@ public List<List<Document>> batch(List<Document> documents) {

for (Document document : documents) {
int tokenCount = this.tokenCountEstimator
.estimate(document.getFormattedContent(this.contentFormater, this.metadataMode));
.estimate(document.getFormattedContent(this.contentFormatter, this.metadataMode));
if (tokenCount > this.maxInputTokenCount) {
throw new IllegalArgumentException(
"Tokens in a single document exceeds the maximum number of allowed input tokens");
Expand Down