|
| 1 | +package unstructured |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/json" |
| 5 | + "fmt" |
| 6 | +) |
| 7 | + |
| 8 | +// ChunkerCharacter is a node that chunks text by character. |
| 9 | +type ChunkerCharacter struct { |
| 10 | + ID string `json:"-"` |
| 11 | + Name string `json:"-"` |
| 12 | + APIURL string `json:"unstructured_api_url,omitempty"` |
| 13 | + APIKey string `json:"unstructured_api_key,omitempty"` |
| 14 | + IncludeOrigElements bool `json:"include_orig_elements,omitempty"` |
| 15 | + NewAfterNChars int `json:"new_after_n_chars,omitempty"` |
| 16 | + MaxCharacters int `json:"max_characters,omitempty"` |
| 17 | + Overlap int `json:"overlap,omitempty"` |
| 18 | + OverlapAll bool `json:"overlap_all"` |
| 19 | + ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"` |
| 20 | +} |
| 21 | + |
| 22 | +// ChunkingStrategy is a strategy for contextual chunking. |
| 23 | +type ChunkingStrategy string |
| 24 | + |
| 25 | +// ChunkingStrategyV1 is a strategy for contextual chunking. |
| 26 | +const ChunkingStrategyV1 = "v1" |
| 27 | + |
| 28 | +var _ WorkflowNode = new(ChunkerCharacter) |
| 29 | + |
| 30 | +// isNode implements the WorkflowNode interface. |
| 31 | +func (c ChunkerCharacter) isNode() {} |
| 32 | + |
| 33 | +// MarshalJSON implements the json.Marshaler interface. |
| 34 | +func (c ChunkerCharacter) MarshalJSON() ([]byte, error) { |
| 35 | + type alias ChunkerCharacter |
| 36 | + |
| 37 | + data, err := json.Marshal(alias(c)) |
| 38 | + if err != nil { |
| 39 | + return nil, fmt.Errorf("failed to marshal chunker character: %w", err) |
| 40 | + } |
| 41 | + |
| 42 | + headerData, err := json.Marshal(header{ |
| 43 | + ID: c.ID, |
| 44 | + Name: c.Name, |
| 45 | + Type: nodeTypeChunk, |
| 46 | + Subtype: string(ChunkerSubtypeCharacter), |
| 47 | + Settings: json.RawMessage(data), |
| 48 | + }) |
| 49 | + if err != nil { |
| 50 | + return nil, fmt.Errorf("failed to marshal chunker character header: %w", err) |
| 51 | + } |
| 52 | + |
| 53 | + return headerData, nil |
| 54 | +} |
0 commit comments