Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2024 AWS Gopher
Copyright (c) 2025 AWS Gopher

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
33 changes: 16 additions & 17 deletions chunker_character.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,17 @@ import (

// ChunkerCharacter is a node that chunks text by character.
type ChunkerCharacter struct {
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
}

// ChunkingStrategy is a strategy for contextual chunking.
type ChunkingStrategy string

// ChunkingStrategyV1 is a strategy for contextual chunking.
const ChunkingStrategyV1 = "v1"

var _ WorkflowNode = new(ChunkerCharacter)

// isNode implements the WorkflowNode interface.
Expand All @@ -34,7 +27,13 @@ func (c ChunkerCharacter) isNode() {}
func (c ChunkerCharacter) MarshalJSON() ([]byte, error) {
type alias ChunkerCharacter

data, err := json.Marshal(alias(c))
data, err := json.Marshal(struct {
alias
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
}{
alias: alias(c),
ContextualChunkingStrategy: "v1",
})
if err != nil {
return nil, fmt.Errorf("failed to marshal chunker character: %w", err)
}
Expand Down
27 changes: 16 additions & 11 deletions chunker_page.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,15 @@ import (

// ChunkerPage is a node that chunks text by character.
type ChunkerPage struct {
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
Strategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
}

var _ WorkflowNode = new(ChunkerPage)
Expand All @@ -28,7 +27,13 @@ func (c ChunkerPage) isNode() {}
func (c ChunkerPage) MarshalJSON() ([]byte, error) {
type alias ChunkerPage

data, err := json.Marshal(alias(c))
data, err := json.Marshal(struct {
alias
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
}{
alias: alias(c),
ContextualChunkingStrategy: "v1",
})
if err != nil {
return nil, fmt.Errorf("failed to marshal chunker page: %w", err)
}
Expand Down
27 changes: 16 additions & 11 deletions chunker_similarity.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,15 @@ import (

// ChunkerSimilarity is a node that chunks text by character.
type ChunkerSimilarity struct {
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
Strategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
}

var _ WorkflowNode = new(ChunkerSimilarity)
Expand All @@ -28,7 +27,13 @@ func (c ChunkerSimilarity) isNode() {}
func (c ChunkerSimilarity) MarshalJSON() ([]byte, error) {
type alias ChunkerSimilarity

data, err := json.Marshal(alias(c))
data, err := json.Marshal(struct {
alias
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
}{
alias: alias(c),
ContextualChunkingStrategy: "v1",
})
if err != nil {
return nil, fmt.Errorf("failed to marshal chunker similarity: %w", err)
}
Expand Down
29 changes: 17 additions & 12 deletions chunker_title.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@ import (

// ChunkerTitle is a node that chunks text by character.
type ChunkerTitle struct {
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
CombineTextUnderN int `json:"combine_text_under_n_chars,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
ID string `json:"-"`
Name string `json:"-"`
APIURL string `json:"unstructured_api_url,omitempty"`
APIKey string `json:"unstructured_api_key,omitempty"`
CombineTextUnderN int `json:"combine_text_under_n_chars,omitempty"`
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
MaxCharacters int `json:"max_characters,omitempty"`
Overlap int `json:"overlap,omitempty"`
OverlapAll bool `json:"overlap_all"`
}

var _ WorkflowNode = new(ChunkerTitle)
Expand All @@ -29,7 +28,13 @@ func (c ChunkerTitle) isNode() {}
func (c ChunkerTitle) MarshalJSON() ([]byte, error) {
type alias ChunkerTitle

data, err := json.Marshal(alias(c))
data, err := json.Marshal(struct {
alias
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
}{
alias: alias(c),
ContextualChunkingStrategy: "v1",
})
if err != nil {
return nil, fmt.Errorf("failed to marshal chunker title: %w", err)
}
Expand Down
Loading