Skip to content

Commit 0158799

Browse files
authored
Merge pull request #3 from aws-gopher/refactor
Refactor connector configurations for type safety and consistency
2 parents 8b5690d + 9a2071a commit 0158799

31 files changed

+979
-998
lines changed

LICENSE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2024 AWS Gopher
3+
Copyright (c) 2025 AWS Gopher
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

chunker_character.go

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,17 @@ import (
77

88
// ChunkerCharacter is a node that chunks text by character.
99
type ChunkerCharacter struct {
10-
ID string `json:"-"`
11-
Name string `json:"-"`
12-
APIURL string `json:"unstructured_api_url,omitempty"`
13-
APIKey string `json:"unstructured_api_key,omitempty"`
14-
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
15-
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
16-
MaxCharacters int `json:"max_characters,omitempty"`
17-
Overlap int `json:"overlap,omitempty"`
18-
OverlapAll bool `json:"overlap_all"`
19-
ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
10+
ID string `json:"-"`
11+
Name string `json:"-"`
12+
APIURL string `json:"unstructured_api_url,omitempty"`
13+
APIKey string `json:"unstructured_api_key,omitempty"`
14+
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
15+
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
16+
MaxCharacters int `json:"max_characters,omitempty"`
17+
Overlap int `json:"overlap,omitempty"`
18+
OverlapAll bool `json:"overlap_all"`
2019
}
2120

22-
// ChunkingStrategy is a strategy for contextual chunking.
23-
type ChunkingStrategy string
24-
25-
// ChunkingStrategyV1 is a strategy for contextual chunking.
26-
const ChunkingStrategyV1 = "v1"
27-
2821
var _ WorkflowNode = new(ChunkerCharacter)
2922

3023
// isNode implements the WorkflowNode interface.
@@ -34,7 +27,13 @@ func (c ChunkerCharacter) isNode() {}
3427
func (c ChunkerCharacter) MarshalJSON() ([]byte, error) {
3528
type alias ChunkerCharacter
3629

37-
data, err := json.Marshal(alias(c))
30+
data, err := json.Marshal(struct {
31+
alias
32+
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
33+
}{
34+
alias: alias(c),
35+
ContextualChunkingStrategy: "v1",
36+
})
3837
if err != nil {
3938
return nil, fmt.Errorf("failed to marshal chunker character: %w", err)
4039
}

chunker_page.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,15 @@ import (
77

88
// ChunkerPage is a node that chunks text by character.
99
type ChunkerPage struct {
10-
ID string `json:"-"`
11-
Name string `json:"-"`
12-
APIURL string `json:"unstructured_api_url,omitempty"`
13-
APIKey string `json:"unstructured_api_key,omitempty"`
14-
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
15-
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
16-
MaxCharacters int `json:"max_characters,omitempty"`
17-
Overlap int `json:"overlap,omitempty"`
18-
OverlapAll bool `json:"overlap_all"`
19-
Strategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
10+
ID string `json:"-"`
11+
Name string `json:"-"`
12+
APIURL string `json:"unstructured_api_url,omitempty"`
13+
APIKey string `json:"unstructured_api_key,omitempty"`
14+
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
15+
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
16+
MaxCharacters int `json:"max_characters,omitempty"`
17+
Overlap int `json:"overlap,omitempty"`
18+
OverlapAll bool `json:"overlap_all"`
2019
}
2120

2221
var _ WorkflowNode = new(ChunkerPage)
@@ -28,7 +27,13 @@ func (c ChunkerPage) isNode() {}
2827
func (c ChunkerPage) MarshalJSON() ([]byte, error) {
2928
type alias ChunkerPage
3029

31-
data, err := json.Marshal(alias(c))
30+
data, err := json.Marshal(struct {
31+
alias
32+
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
33+
}{
34+
alias: alias(c),
35+
ContextualChunkingStrategy: "v1",
36+
})
3237
if err != nil {
3338
return nil, fmt.Errorf("failed to marshal chunker page: %w", err)
3439
}

chunker_similarity.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,15 @@ import (
77

88
// ChunkerSimilarity is a node that chunks text by character.
99
type ChunkerSimilarity struct {
10-
ID string `json:"-"`
11-
Name string `json:"-"`
12-
APIURL string `json:"unstructured_api_url,omitempty"`
13-
APIKey string `json:"unstructured_api_key,omitempty"`
14-
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
15-
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
16-
MaxCharacters int `json:"max_characters,omitempty"`
17-
Overlap int `json:"overlap,omitempty"`
18-
OverlapAll bool `json:"overlap_all"`
19-
Strategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
10+
ID string `json:"-"`
11+
Name string `json:"-"`
12+
APIURL string `json:"unstructured_api_url,omitempty"`
13+
APIKey string `json:"unstructured_api_key,omitempty"`
14+
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
15+
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
16+
MaxCharacters int `json:"max_characters,omitempty"`
17+
Overlap int `json:"overlap,omitempty"`
18+
OverlapAll bool `json:"overlap_all"`
2019
}
2120

2221
var _ WorkflowNode = new(ChunkerSimilarity)
@@ -28,7 +27,13 @@ func (c ChunkerSimilarity) isNode() {}
2827
func (c ChunkerSimilarity) MarshalJSON() ([]byte, error) {
2928
type alias ChunkerSimilarity
3029

31-
data, err := json.Marshal(alias(c))
30+
data, err := json.Marshal(struct {
31+
alias
32+
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
33+
}{
34+
alias: alias(c),
35+
ContextualChunkingStrategy: "v1",
36+
})
3237
if err != nil {
3338
return nil, fmt.Errorf("failed to marshal chunker similarity: %w", err)
3439
}

chunker_title.go

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,16 @@ import (
77

88
// ChunkerTitle is a node that chunks text by character.
99
type ChunkerTitle struct {
10-
ID string `json:"-"`
11-
Name string `json:"-"`
12-
APIURL string `json:"unstructured_api_url,omitempty"`
13-
APIKey string `json:"unstructured_api_key,omitempty"`
14-
CombineTextUnderN int `json:"combine_text_under_n_chars,omitempty"`
15-
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
16-
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
17-
MaxCharacters int `json:"max_characters,omitempty"`
18-
Overlap int `json:"overlap,omitempty"`
19-
OverlapAll bool `json:"overlap_all"`
20-
ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"`
10+
ID string `json:"-"`
11+
Name string `json:"-"`
12+
APIURL string `json:"unstructured_api_url,omitempty"`
13+
APIKey string `json:"unstructured_api_key,omitempty"`
14+
CombineTextUnderN int `json:"combine_text_under_n_chars,omitempty"`
15+
IncludeOrigElements bool `json:"include_orig_elements,omitempty"`
16+
NewAfterNChars int `json:"new_after_n_chars,omitempty"`
17+
MaxCharacters int `json:"max_characters,omitempty"`
18+
Overlap int `json:"overlap,omitempty"`
19+
OverlapAll bool `json:"overlap_all"`
2120
}
2221

2322
var _ WorkflowNode = new(ChunkerTitle)
@@ -29,7 +28,13 @@ func (c ChunkerTitle) isNode() {}
2928
func (c ChunkerTitle) MarshalJSON() ([]byte, error) {
3029
type alias ChunkerTitle
3130

32-
data, err := json.Marshal(alias(c))
31+
data, err := json.Marshal(struct {
32+
alias
33+
ContextualChunkingStrategy string `json:"contextual_chunking_strategy"`
34+
}{
35+
alias: alias(c),
36+
ContextualChunkingStrategy: "v1",
37+
})
3338
if err != nil {
3439
return nil, fmt.Errorf("failed to marshal chunker title: %w", err)
3540
}

0 commit comments

Comments
 (0)