diff --git a/.golangci.yml b/.golangci.yml index fd775f7..3801335 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -58,9 +58,14 @@ linters: check-type-assertions: false exclude-functions: - io/ioutil.ReadFile - - io.Copy(*bytes.Buffer) - - io.Copy(os.Stdout) + - io.Copy - (io.Closer).Close + - (net/http.ResponseWriter).Write + - io.Writer.Write + gosec: + excludes: + - G101 + - G104 # Issues configuration issues: diff --git a/bearer.go b/bearer.go index e48b43a..a68c5d5 100644 --- a/bearer.go +++ b/bearer.go @@ -9,9 +9,12 @@ type bearer struct { rt http.RoundTripper } +// HeaderKey is "Unstructured-API-Key", which is the header where Unstructured expects to find the API key. +const HeaderKey = "Unstructured-API-Key" + // RoundTrip implements the http.RoundTripper interface. func (b *bearer) RoundTrip(req *http.Request) (*http.Response, error) { - req.Header.Set("Unstructured-API-Key", b.key) + req.Header.Set(HeaderKey, b.key) // This is implementing the http.RoundTripper interface, errors should be passed through as-is return b.rt.RoundTrip(req) //nolint:wrapcheck diff --git a/block_types.go b/block_types.go new file mode 100644 index 0000000..9cc8f47 --- /dev/null +++ b/block_types.go @@ -0,0 +1,10 @@ +package unstructured + +// BlockType is a type that represents a block type. +type BlockType string + +// BlockType constants. +const ( + BlockTypeImage BlockType = "Image" + BlockTypeTable BlockType = "Table" +) diff --git a/chunker_character.go b/chunker_character.go new file mode 100644 index 0000000..b04bc3a --- /dev/null +++ b/chunker_character.go @@ -0,0 +1,54 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// ChunkerCharacter is a node that chunks text by character. +type ChunkerCharacter struct { + ID string `json:"-"` + Name string `json:"-"` + APIURL string `json:"unstructured_api_url,omitempty"` + APIKey string `json:"unstructured_api_key,omitempty"` + IncludeOrigElements bool `json:"include_orig_elements,omitempty"` + NewAfterNChars int `json:"new_after_n_chars,omitempty"` + MaxCharacters int `json:"max_characters,omitempty"` + Overlap int `json:"overlap,omitempty"` + OverlapAll bool `json:"overlap_all"` + ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"` +} + +// ChunkingStrategy is a strategy for contextual chunking. +type ChunkingStrategy string + +// ChunkingStrategyV1 is a strategy for contextual chunking. +const ChunkingStrategyV1 = "v1" + +var _ WorkflowNode = new(ChunkerCharacter) + +// isNode implements the WorkflowNode interface. +func (c ChunkerCharacter) isNode() {} + +// MarshalJSON implements the json.Marshaler interface. +func (c ChunkerCharacter) MarshalJSON() ([]byte, error) { + type alias ChunkerCharacter + + data, err := json.Marshal(alias(c)) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker character: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: c.ID, + Name: c.Name, + Type: nodeTypeChunk, + Subtype: string(ChunkerSubtypeCharacter), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker character header: %w", err) + } + + return headerData, nil +} diff --git a/chunker_page.go b/chunker_page.go new file mode 100644 index 0000000..47e283f --- /dev/null +++ b/chunker_page.go @@ -0,0 +1,48 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// ChunkerPage is a node that chunks text by character. +type ChunkerPage struct { + ID string `json:"-"` + Name string `json:"-"` + APIURL string `json:"unstructured_api_url,omitempty"` + APIKey string `json:"unstructured_api_key,omitempty"` + IncludeOrigElements bool `json:"include_orig_elements,omitempty"` + NewAfterNChars int `json:"new_after_n_chars,omitempty"` + MaxCharacters int `json:"max_characters,omitempty"` + Overlap int `json:"overlap,omitempty"` + OverlapAll bool `json:"overlap_all"` + Strategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"` +} + +var _ WorkflowNode = new(ChunkerPage) + +// isNode implements the WorkflowNode interface. +func (c ChunkerPage) isNode() {} + +// MarshalJSON implements the json.Marshaler interface. +func (c ChunkerPage) MarshalJSON() ([]byte, error) { + type alias ChunkerPage + + data, err := json.Marshal(alias(c)) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker page: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: c.ID, + Name: c.Name, + Type: nodeTypeChunk, + Subtype: string(ChunkerSubtypePage), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker page header: %w", err) + } + + return headerData, nil +} diff --git a/chunker_similarity.go b/chunker_similarity.go new file mode 100644 index 0000000..610761e --- /dev/null +++ b/chunker_similarity.go @@ -0,0 +1,48 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// ChunkerSimilarity is a node that chunks text by character. +type ChunkerSimilarity struct { + ID string `json:"-"` + Name string `json:"-"` + APIURL string `json:"unstructured_api_url,omitempty"` + APIKey string `json:"unstructured_api_key,omitempty"` + IncludeOrigElements bool `json:"include_orig_elements,omitempty"` + NewAfterNChars int `json:"new_after_n_chars,omitempty"` + MaxCharacters int `json:"max_characters,omitempty"` + Overlap int `json:"overlap,omitempty"` + OverlapAll bool `json:"overlap_all"` + Strategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"` +} + +var _ WorkflowNode = new(ChunkerSimilarity) + +// isNode implements the WorkflowNode interface. +func (c ChunkerSimilarity) isNode() {} + +// MarshalJSON implements the json.Marshaler interface. +func (c ChunkerSimilarity) MarshalJSON() ([]byte, error) { + type alias ChunkerSimilarity + + data, err := json.Marshal(alias(c)) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker similarity: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: c.ID, + Name: c.Name, + Type: nodeTypeChunk, + Subtype: string(ChunkerSubtypeSimilarity), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker similarity header: %w", err) + } + + return headerData, nil +} diff --git a/chunker_title.go b/chunker_title.go new file mode 100644 index 0000000..5f81c17 --- /dev/null +++ b/chunker_title.go @@ -0,0 +1,49 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// ChunkerTitle is a node that chunks text by character. +type ChunkerTitle struct { + ID string `json:"-"` + Name string `json:"-"` + APIURL string `json:"unstructured_api_url,omitempty"` + APIKey string `json:"unstructured_api_key,omitempty"` + CombineTextUnderN int `json:"combine_text_under_n_chars,omitempty"` + IncludeOrigElements bool `json:"include_orig_elements,omitempty"` + NewAfterNChars int `json:"new_after_n_chars,omitempty"` + MaxCharacters int `json:"max_characters,omitempty"` + Overlap int `json:"overlap,omitempty"` + OverlapAll bool `json:"overlap_all"` + ContextualChunkingStrategy ChunkingStrategy `json:"contextual_chunking_strategy,omitempty"` +} + +var _ WorkflowNode = new(ChunkerTitle) + +// isNode implements the WorkflowNode interface. +func (c ChunkerTitle) isNode() {} + +// MarshalJSON implements the json.Marshaler interface. +func (c ChunkerTitle) MarshalJSON() ([]byte, error) { + type alias ChunkerTitle + + data, err := json.Marshal(alias(c)) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker title: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: c.ID, + Name: c.Name, + Type: nodeTypeChunk, + Subtype: string(ChunkerSubtypeTitle), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal chunker title header: %w", err) + } + + return headerData, nil +} diff --git a/chunker_type.go b/chunker_type.go new file mode 100644 index 0000000..d76ca4b --- /dev/null +++ b/chunker_type.go @@ -0,0 +1,56 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// ChunkerSubtype is a type that represents a chunker subtype. +type ChunkerSubtype string + +// ChunkerSubtype constants. +const ( + ChunkerSubtypeCharacter ChunkerSubtype = "chunk_by_character" + ChunkerSubtypeTitle ChunkerSubtype = "chunk_by_title" + ChunkerSubtypePage ChunkerSubtype = "chunk_by_page" + ChunkerSubtypeSimilarity ChunkerSubtype = "chunk_by_similarity" +) + +func unmarshalChunker(header header) (WorkflowNode, error) { + var chunker WorkflowNode + + switch ChunkerSubtype(header.Subtype) { + case ChunkerSubtypeCharacter: + chunker = &ChunkerCharacter{ + ID: header.ID, + Name: header.Name, + } + + case ChunkerSubtypeTitle: + chunker = &ChunkerTitle{ + ID: header.ID, + Name: header.Name, + } + + case ChunkerSubtypePage: + chunker = &ChunkerPage{ + ID: header.ID, + Name: header.Name, + } + + case ChunkerSubtypeSimilarity: + chunker = &ChunkerSimilarity{ + ID: header.ID, + Name: header.Name, + } + + default: + return nil, fmt.Errorf("unknown Chunker strategy: %s", header.Subtype) + } + + if err := json.Unmarshal(header.Settings, chunker); err != nil { + return nil, fmt.Errorf("failed to unmarshal Chunker node: %w", err) + } + + return chunker, nil +} diff --git a/client.go b/client.go index 6bd8f86..e1530ff 100644 --- a/client.go +++ b/client.go @@ -3,7 +3,9 @@ package unstructured import ( "cmp" "encoding/json" + "errors" "fmt" + "io" "net/http" "net/url" "os" @@ -55,6 +57,15 @@ func WithKey(key string) Option { } } +// WithClient returns an Option that sets the HTTP client to use for requests. +// If no client is provided, the client will default to [http.DefaultClient]. +func WithClient(hc *http.Client) Option { + return func(c *Client) error { + c.hc = hc + return nil + } +} + // New creates a new Client instance with the provided options. // If the `UNSTRUCTURED_API_KEY` environment variable is set, it will be used as the API key for authentication. // If the `UNSTRUCTURED_API_URL` environment variable is set to a valid URL, it will be used as the base URL for the Unstructured.io API. @@ -104,17 +115,26 @@ func (c *Client) do(req *http.Request, out any) error { defer func() { _ = resp.Body.Close() }() if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices { + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read response body: %w", err) + } + // Handle 422 validation errors specifically if resp.StatusCode == http.StatusUnprocessableEntity { var validationErr HTTPValidationError - if err := json.NewDecoder(resp.Body).Decode(&validationErr); err != nil { - return fmt.Errorf("failed to decode validation error response: %w", err) + if err := json.Unmarshal(body, &validationErr); err == nil { + return &APIError{ + Code: resp.StatusCode, + Err: &validationErr, + } } - - return &validationErr } - return fmt.Errorf("unsuccessful response: %s", resp.Status) + return &APIError{ + Code: resp.StatusCode, + Err: errors.New(string(body)), + } } if out != nil { diff --git a/client_test.go b/client_test.go new file mode 100644 index 0000000..61b505f --- /dev/null +++ b/client_test.go @@ -0,0 +1,40 @@ +package unstructured + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/aws-gopher/unstructured-sdk-go/test" +) + +func testclient(t *testing.T) (*Client, *test.Mux) { + mux := test.NewMux() + + server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + val := r.Header.Get("unstructured-api-key") + if val == "" { + http.Error(w, "Unauthorized: missing header", http.StatusUnauthorized) + return + } + + if val != test.FakeAPIKey { + http.Error(w, "Unauthorized: invalid key", http.StatusUnauthorized) + return + } + + mux.ServeHTTP(w, r) + })) + t.Cleanup(server.Close) + + c, err := New( + WithClient(server.Client()), + WithEndpoint(server.URL), + WithKey(test.FakeAPIKey), + ) + if err != nil { + t.Fatalf("failed to create client: %v", err) + } + + return c, mux +} diff --git a/destination_connection_check.go b/destination_connection_check.go index 6219592..7b1e705 100644 --- a/destination_connection_check.go +++ b/destination_connection_check.go @@ -11,7 +11,7 @@ import ( func (c *Client) CreateDestinationConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error) { req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/destinations", id, "connection-check").String(), + c.endpoint.JoinPath("destinations", id, "connection-check").String(), nil, ) if err != nil { @@ -31,7 +31,7 @@ func (c *Client) CreateDestinationConnectionCheck(ctx context.Context, id string func (c *Client) GetDestinationConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/destinations", id, "connection-check").String(), + c.endpoint.JoinPath("destinations", id, "connection-check").String(), nil, ) if err != nil { diff --git a/destination_create.go b/destination_create.go index ce05f89..6b6cbbe 100644 --- a/destination_create.go +++ b/destination_create.go @@ -33,7 +33,7 @@ func (c *Client) CreateDestination(ctx context.Context, in CreateDestinationRequ req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/destinations").String(), + c.endpoint.JoinPath("destinations/").String(), bytes.NewReader(body), ) if err != nil { @@ -54,7 +54,6 @@ func (c *Client) CreateDestination(ctx context.Context, in CreateDestinationRequ // It contains the name, type, and configuration for the destination. type CreateDestinationRequest struct { Name string - Type string Config DestinationConfigInput } diff --git a/destination_create_test.go b/destination_create_test.go new file mode 100644 index 0000000..622e7a9 --- /dev/null +++ b/destination_create_test.go @@ -0,0 +1,60 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestCreateDestination(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.CreateDestination = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`{` + + ` "config": {` + + ` "remote_url": "s3://mock-s3-connector",` + + ` "key": "blah",` + + ` "secret": "blah",` + + ` "anonymous": false` + + ` },` + + ` "created_at": "2023-09-15T01:06:53.146Z",` + + ` "id": "b25d4161-77a0-4e08-b65e-86f398ce15ad",` + + ` "name": "test_destination_name",` + + ` "type": "s3"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + destination, err := client.CreateDestination(t.Context(), CreateDestinationRequest{ + Name: "test_destination_name", + + Config: &S3DestinationConnectorConfigInput{ + RemoteURL: "s3://mock-s3-connector", + Key: String("blah"), + Secret: String("blah"), + }, + }) + if err != nil { + t.Fatalf("failed to create destination: %v", err) + } + + if err := errors.Join( + eq("destination.id", destination.ID, "b25d4161-77a0-4e08-b65e-86f398ce15ad"), + eq("destination.name", destination.Name, "test_destination_name"), + equal("destination.created_at", destination.CreatedAt, time.Date(2023, 9, 15, 1, 6, 53, 146000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := destination.Config.(*S3DestinationConnectorConfig) + if !ok { + t.Errorf("expected destination config to be %T, got %T", cfg, destination.Config) + } +} diff --git a/destination_delete.go b/destination_delete.go index 65a7b64..6fed7bb 100644 --- a/destination_delete.go +++ b/destination_delete.go @@ -10,7 +10,7 @@ import ( func (c *Client) DeleteDestination(ctx context.Context, id string) error { req, err := http.NewRequestWithContext(ctx, http.MethodDelete, - c.endpoint.JoinPath("/destinations", id).String(), + c.endpoint.JoinPath("destinations", id).String(), nil, ) if err != nil { diff --git a/destination_delete_test.go b/destination_delete_test.go new file mode 100644 index 0000000..6f58835 --- /dev/null +++ b/destination_delete_test.go @@ -0,0 +1,34 @@ +package unstructured + +import ( + "net/http" + "strconv" + "testing" +) + +func TestDeleteDestination(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "b25d4161-77a0-4e08-b65e-86f398ce15ad" + + mux.DeleteDestination = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "destination ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{"detail": "Destination with id ` + id + ` successfully deleted."}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + err := client.DeleteDestination(t.Context(), "b25d4161-77a0-4e08-b65e-86f398ce15ad") + if err != nil { + t.Fatalf("failed to delete destination: %v", err) + } +} diff --git a/destination_get.go b/destination_get.go index d3c0f22..849a34d 100644 --- a/destination_get.go +++ b/destination_get.go @@ -10,7 +10,7 @@ import ( func (c *Client) GetDestination(ctx context.Context, id string) (*Destination, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/destinations", id).String(), + c.endpoint.JoinPath("destinations", id).String(), nil, ) if err != nil { diff --git a/destination_get_test.go b/destination_get_test.go new file mode 100644 index 0000000..28ebc00 --- /dev/null +++ b/destination_get_test.go @@ -0,0 +1,86 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestGetDestination(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "0c363dec-3c70-45ee-8041-481044a6e1cc" + mux.GetDestination = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "destination ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "config": {` + + ` "remote_url": "s3://mock-s3-connector",` + + ` "anonymous": false,` + + ` "key": "**********",` + + ` "secret": "**********",` + + ` "token": null,` + + ` "endpoint_url": null` + + ` },` + + ` "created_at": "2025-08-22T08:47:29.802Z",` + + ` "id": "` + id + `",` + + ` "name": "test_destination_name",` + + ` "type": "s3"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + destination, err := client.GetDestination(t.Context(), id) + if err != nil { + t.Fatalf("failed to get destination: %v", err) + } + + if err := errors.Join( + eq("destination.id", destination.ID, id), + eq("destination.name", destination.Name, "test_destination_name"), + eq("destination.type", destination.Type, "s3"), + equal("destination.created_at", destination.CreatedAt, time.Date(2025, 8, 22, 8, 47, 29, 802000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := destination.Config.(*S3DestinationConnectorConfig) + if !ok { + t.Errorf("expected destination config to be %T, got %T", cfg, destination.Config) + } +} + +func TestGetDestinationNotFound(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "0c363dec-3c70-45ee-8041-481044a6e1cc" + mux.GetDestination = func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "destination ID "+r.PathValue("id")+" not found", http.StatusNotFound) + } + + _, err := client.GetDestination(t.Context(), id) + if err == nil { + t.Fatalf("expected error, got nil") + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != http.StatusNotFound { + t.Fatalf("expected error code to be %d, got %d", http.StatusNotFound, apierr.Code) + } +} diff --git a/destination_list.go b/destination_list.go index e80388b..f7b813f 100644 --- a/destination_list.go +++ b/destination_list.go @@ -10,7 +10,7 @@ import ( func (c *Client) ListDestinations(ctx context.Context, typ string) ([]Destination, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/destinations").String(), + c.endpoint.JoinPath("destinations").String(), nil, ) if err != nil { diff --git a/destination_list_test.go b/destination_list_test.go new file mode 100644 index 0000000..487b49a --- /dev/null +++ b/destination_list_test.go @@ -0,0 +1,129 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "strings" + "testing" + "time" +) + +func TestListDestinations(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListDestinations = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`[` + + ` {` + + ` "config": {` + + ` "remote_url": "s3://mock-s3-connector",` + + ` "anonymous": false,` + + ` "key": "**********",` + + ` "secret": "**********",` + + ` "token": null,` + + ` "endpoint_url": null` + + ` },` + + ` "created_at": "2025-08-22T08:47:29.802Z",` + + ` "id": "0c363dec-3c70-45ee-8041-481044a6e1cc",` + + ` "name": "test_destination_name",` + + ` "type": "s3"` + + ` }` + + `]`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + destinations, err := client.ListDestinations(t.Context(), "") + if err != nil { + t.Fatalf("failed to list destinations: %v", err) + } + + if len(destinations) != 1 { + t.Fatalf("expected 1 destination, got %d", len(destinations)) + } + + destination := destinations[0] + if err := errors.Join( + eq("destination.id", destination.ID, "0c363dec-3c70-45ee-8041-481044a6e1cc"), + eq("destination.name", destination.Name, "test_destination_name"), + eq("destination.type", destination.Type, "s3"), + equal("destination.created_at", destination.CreatedAt, time.Date(2025, 8, 22, 8, 47, 29, 802000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := destination.Config.(*S3DestinationConnectorConfig) + if !ok { + t.Errorf("expected destination config to be %T, got %T", cfg, destination.Config) + } +} + +func TestListDestinationsEmpty(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListDestinations = func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", "2") + w.WriteHeader(http.StatusOK) + w.Write([]byte("[]")) + } + + destinations, err := client.ListDestinations(t.Context(), "") + if err != nil { + t.Fatalf("failed to list destinations: %v", err) + } + + if len(destinations) != 0 { + t.Fatalf("expected 0 destinations, got %d", len(destinations)) + } +} + +func TestListDestinationsErrorCode(t *testing.T) { + t.Parallel() + + for _, code := range []int{ + http.StatusBadRequest, // 400 + http.StatusUnauthorized, // 401 + http.StatusForbidden, // 403 + http.StatusNotFound, // 404 + http.StatusInternalServerError, // 500 + http.StatusBadGateway, // 502 + http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout, // 504 + } { + t.Run(strconv.Itoa(code), func(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListDestinations = func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + } + + _, err := client.ListDestinations(t.Context(), "") + if err == nil { + t.Fatalf("expected error, got nil") + } + + if !strings.Contains(err.Error(), "API error occurred") { + t.Fatalf("expected error to contain 'API error occurred', got %v", err) + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != code { + t.Fatalf("expected error code to be %d, got %d", code, apierr.Code) + } + }) + } +} diff --git a/destination_update.go b/destination_update.go index b6240ab..77b3415 100644 --- a/destination_update.go +++ b/destination_update.go @@ -35,7 +35,7 @@ func (c *Client) UpdateDestination(ctx context.Context, in UpdateDestinationRequ req, err := http.NewRequestWithContext(ctx, http.MethodPut, - c.endpoint.JoinPath("/destinations", in.ID).String(), + c.endpoint.JoinPath("destinations", in.ID).String(), bytes.NewReader(body), ) if err != nil { diff --git a/destination_update_test.go b/destination_update_test.go new file mode 100644 index 0000000..3ab9979 --- /dev/null +++ b/destination_update_test.go @@ -0,0 +1,67 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestUpdateDestination(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "b25d4161-77a0-4e08-b65e-86f398ce15ad" + + mux.UpdateDestination = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "destination ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "config": {` + + ` "remote_url": "s3://mock-s3-connector",` + + ` "key": "blah",` + + ` "secret": "blah",` + + ` "anonymous": false` + + ` },` + + ` "created_at": "2023-09-15T01:06:53.146Z",` + + ` "id": "` + id + `",` + + ` "name": "test_destination_name",` + + ` "type": "s3"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + updated, err := client.UpdateDestination(t.Context(), UpdateDestinationRequest{ + ID: id, + Config: &S3DestinationConnectorConfigInput{ + RemoteURL: "s3://mock-s3-connector", + Key: String("blah"), + Secret: String("blah"), + }, + }) + if err != nil { + t.Fatalf("failed to update destination: %v", err) + } + + if err := errors.Join( + eq("updated_destination.id", updated.ID, id), + eq("updated_destination.name", updated.Name, "test_destination_name"), + equal("updated_destination.created_at", updated.CreatedAt, time.Date(2023, 9, 15, 1, 6, 53, 146000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := updated.Config.(*S3DestinationConnectorConfig) + if !ok { + t.Errorf("expected destination config to be %T, got %T", cfg, updated.Config) + } +} diff --git a/encoding.go b/encoding.go new file mode 100644 index 0000000..5ed5817 --- /dev/null +++ b/encoding.go @@ -0,0 +1,45 @@ +package unstructured + +import "strings" + +// Encoding is a type that represents an encoding. +type Encoding string + +// Encoding constants. +const ( + EncodingUTF8 Encoding = "utf_8" + EncodingISO88591 Encoding = "iso_8859_1" + EncodingISO88596 Encoding = "iso_8859_6" + EncodingISO88598 Encoding = "iso_8859_8" + EncodingASCII Encoding = "ascii" + EncodingBig5 Encoding = "big5" + EncodingUTF16 Encoding = "utf_16" + EncodingUTF16Be Encoding = "utf_16_be" + EncodingUTF16Le Encoding = "utf_16_le" + EncodingUTF32 Encoding = "utf_32" + EncodingUTF32Be Encoding = "utf_32_be" + EncodingUTF32Le Encoding = "utf_32_le" + EncodingEUCJIS2004 Encoding = "euc_jis_2004" + EncodingEUCJISX0213 Encoding = "euc_jisx0213" + EncodingEUCJP Encoding = "euc_jp" + EncodingEUCKR Encoding = "euc_kr" + EncodingGb18030 Encoding = "gb18030" + EncodingSHIFTJIS Encoding = "shift_jis" + EncodingSHIFTJIS2004 Encoding = "shift_jis_2004" + EncodingSHIFTJISX0213 Encoding = "shift_jisx0213" +) + +// String implements the fmt.Stringer interface, canonicalizing the encoding name. +func (e Encoding) String() string { + s := strings.TrimSpace(string(e)) + s = strings.ToLower(s) + s = strings.ReplaceAll(s, "_", "-") + + switch s { + case "iso_8859_6_i", "iso_8859_8_i", + "iso_8859_6_e", "iso_8859_8_e": + s = s[:len(s)-2] + } + + return s +} diff --git a/enricher.go b/enricher.go new file mode 100644 index 0000000..6ba2806 --- /dev/null +++ b/enricher.go @@ -0,0 +1,85 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// Enricher is a node that enriches text. +type Enricher struct { + ID string `json:"-"` + Name string `json:"-"` + Subtype EnrichmentType `json:"-"` + NERPromptOverride string `json:"prompt_interface_overrides,omitempty"` +} + +// EnrichmentType is a type that represents an enrichment type. +type EnrichmentType string + +// EnrichmentType constants. +const ( + EnrichmentTypeImageOpenAI EnrichmentType = "openai_image_description" + EnrichmentTypeTableOpenAI EnrichmentType = "openai_table_description" + EnrichmentTypeTable2HTMLOpenAI EnrichmentType = "openai_table2html" + EnrichmentTypeNEROpenAI EnrichmentType = "openai_ner" + + EnrichmentTypeImageAnthropic EnrichmentType = "anthropic_image_description" + EnrichmentTypeTableAnthropic EnrichmentType = "anthropic_table_description" + EnrichmentTypeNERAnthropic EnrichmentType = "anthropic_ner" + + EnrichmentTypeImageBedrock EnrichmentType = "bedrock_image_description" + EnrichmentTypeTableBedrock EnrichmentType = "bedrock_table_description" +) + +var _ WorkflowNode = new(Enricher) + +func (e Enricher) isNode() {} + +// MarshalJSON implements the json.Marshaler interface. +func (e Enricher) MarshalJSON() ([]byte, error) { + var settings json.RawMessage + + if e.NERPromptOverride != "" && (e.Subtype == EnrichmentTypeNERAnthropic || e.Subtype == EnrichmentTypeNEROpenAI) { + nested := struct { + PromptOverride struct { + Prompt struct { + User string `json:"user"` + } `json:"prompt"` + } `json:"prompt_interface_overrides"` + }{} + nested.PromptOverride.Prompt.User = e.NERPromptOverride + + data, err := json.Marshal(nested) + if err != nil { + return nil, fmt.Errorf("failed to marshal enricher nested settings: %w", err) + } + + settings = json.RawMessage(data) + } + + headerData, err := json.Marshal(header{ + ID: e.ID, + Name: e.Name, + Type: nodeTypeEnrich, + Subtype: string(e.Subtype), + Settings: settings, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal enricher header: %w", err) + } + + return headerData, nil +} + +func unmarshalEnricher(header header) (WorkflowNode, error) { + enricher := &Enricher{ + ID: header.ID, + Name: header.Name, + } + + if err := json.Unmarshal(header.Settings, enricher); err != nil { + return nil, fmt.Errorf("failed to unmarshal enricher: %w", err) + } + + return enricher, nil +} diff --git a/errors.go b/errors.go index 29dc6bd..d130ed2 100644 --- a/errors.go +++ b/errors.go @@ -34,3 +34,16 @@ type ValidationError struct { func (e *ValidationError) Error() string { return fmt.Sprintf("%s at %v: %s", e.Type, e.Location, e.Message) } + +// APIError represents an error returned by the API when a non-200 status code is returned. +type APIError struct { + Code int + Err error +} + +// Error returns a string representation of the API error. +func (e *APIError) Error() string { + return fmt.Sprintf("an API error occurred: [%d] %s", e.Code, e.Err.Error()) +} + +func (e *APIError) Unwrap() error { return e.Err } diff --git a/excludable_elements.go b/excludable_elements.go new file mode 100644 index 0000000..3f3498e --- /dev/null +++ b/excludable_elements.go @@ -0,0 +1,21 @@ +package unstructured + +// ExcludeableElement represents elements that can be excluded during document processing. +type ExcludeableElement string + +// Excludeable element constants for document processing. +const ( + ExcludableElementFigureCaption ExcludeableElement = "FigureCaption" + ExcludableElementNarrativeText ExcludeableElement = "NarrativeText" + ExcludableElementListItem ExcludeableElement = "ListItem" + ExcludableElementTitle ExcludeableElement = "Title" + ExcludableElementAddress ExcludeableElement = "Address" + ExcludableElementTable ExcludeableElement = "Table" + ExcludableElementPageBreak ExcludeableElement = "PageBreak" + ExcludableElementHeader ExcludeableElement = "Header" + ExcludableElementFooter ExcludeableElement = "Footer" + ExcludableElementUncategorizedText ExcludeableElement = "UncategorizedText" + ExcludableElementImage ExcludeableElement = "Image" + ExcludableElementFormula ExcludeableElement = "Formula" + ExcludableElementEmailAddress ExcludeableElement = "EmailAddress" +) diff --git a/go.mod b/go.mod index f780117..7a1ca3a 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/aws-gopher/unstructured-sdk-go -go 1.24.5 +go 1.25.0 diff --git a/job.go b/job.go index 528d693..9fd5872 100644 --- a/job.go +++ b/job.go @@ -1,6 +1,11 @@ package unstructured -import "time" +import ( + "encoding/json" + "fmt" + "strings" + "time" +) // Job represents a job, which is an execution of a workflow in Unstructured.io. type Job struct { @@ -15,6 +20,32 @@ type Job struct { JobType WorkflowJobType `json:"job_type"` } +// UnmarshalJSON implements the json.Unmarshaler interface. +func (j *Job) UnmarshalJSON(data []byte) error { + type mask Job + + shadowed := struct { + *mask + CreatedAt string `json:"created_at,omitempty"` + }{ + mask: (*mask)(j), + } + if err := json.Unmarshal(data, &shadowed); err != nil { + return fmt.Errorf("failed to unmarshal job: %w", err) + } + + if shadowed.CreatedAt != "" { + t, err := time.Parse("2006-01-02T15:04:05", strings.TrimSuffix(shadowed.CreatedAt, "Z")) + if err != nil { + return fmt.Errorf("failed to parse job creation time: %w", err) + } + + j.CreatedAt = t + } + + return nil +} + // JobStatus represents the status of a job (e.g., scheduled, in progress, completed, stopped, failed). type JobStatus string diff --git a/job_cancel.go b/job_cancel.go index 2b0a667..2e3fad5 100644 --- a/job_cancel.go +++ b/job_cancel.go @@ -10,7 +10,7 @@ import ( func (c *Client) CancelJob(ctx context.Context, id string) error { req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/jobs", id, "cancel").String(), + c.endpoint.JoinPath("jobs", id, "cancel").String(), nil, ) if err != nil { diff --git a/job_cancel_test.go b/job_cancel_test.go new file mode 100644 index 0000000..e358793 --- /dev/null +++ b/job_cancel_test.go @@ -0,0 +1,37 @@ +package unstructured + +import ( + "net/http" + "strconv" + "testing" +) + +func TestCancelJob(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "fcdc4994-eea5-425c-91fa-e03f2bd8030d" + + mux.CancelJob = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "job ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "id": "` + id + `",` + + ` "status": "cancelled",` + + ` "message": "Job successfully cancelled."` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + err := client.CancelJob(t.Context(), id) + if err != nil { + t.Fatalf("failed to cancel job: %v", err) + } +} diff --git a/job_details.go b/job_details.go index 6d7aaf4..9040d6c 100644 --- a/job_details.go +++ b/job_details.go @@ -11,7 +11,7 @@ import ( func (c *Client) GetJobDetails(ctx context.Context, id string) (*JobDetails, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/jobs", id, "details").String(), + c.endpoint.JoinPath("jobs", id, "details").String(), nil, ) if err != nil { diff --git a/job_download.go b/job_download.go index 49c3649..437dd45 100644 --- a/job_download.go +++ b/job_download.go @@ -7,24 +7,34 @@ import ( "net/http" ) +// DownloadJobRequest represents a request to download a job output file. +type DownloadJobRequest struct { + JobID string + NodeID string + FileID string +} + // DownloadJob downloads the output files from a completed job -func (c *Client) DownloadJob(ctx context.Context, id string) (io.ReadCloser, error) { +func (c *Client) DownloadJob(ctx context.Context, in DownloadJobRequest) (io.ReadCloser, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/jobs", id, "download").String(), + c.endpoint.JoinPath("jobs", in.JobID, "download").String(), nil, ) if err != nil { return nil, fmt.Errorf("failed to create HTTP request: %w", err) } + q := req.URL.Query() + q.Add("node_id", in.NodeID) + q.Add("file_id", in.FileID) + req.URL.RawQuery = q.Encode() + resp, err := c.hc.Do(req) if err != nil { return nil, fmt.Errorf("failed to execute HTTP request: %w", err) } - defer func() { _ = resp.Body.Close() }() - if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to download job: %s", resp.Status) } diff --git a/job_failed_files.go b/job_failed_files.go index 790af71..d607959 100644 --- a/job_failed_files.go +++ b/job_failed_files.go @@ -11,7 +11,7 @@ import ( func (c *Client) GetJobFailedFiles(ctx context.Context, id string) (*JobFailedFiles, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/jobs", id, "failed-files").String(), + c.endpoint.JoinPath("jobs", id, "failed-files").String(), nil, ) if err != nil { diff --git a/job_get.go b/job_get.go index 48cd203..233fff1 100644 --- a/job_get.go +++ b/job_get.go @@ -10,7 +10,7 @@ import ( func (c *Client) GetJob(ctx context.Context, id string) (*Job, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/jobs", id).String(), + c.endpoint.JoinPath("jobs", id).String(), nil, ) if err != nil { diff --git a/job_get_test.go b/job_get_test.go new file mode 100644 index 0000000..17ff29b --- /dev/null +++ b/job_get_test.go @@ -0,0 +1,124 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "strings" + "testing" + "time" +) + +func TestGetJob(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "fcdc4994-eea5-425c-91fa-e03f2bd8030d" + mux.GetJob = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "job ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "created_at": "2025-06-22T11:37:21.648Z",` + + ` "id": "` + id + `",` + + ` "status": "SCHEDULED",` + + ` "runtime": null,` + + ` "workflow_id": "16b80fee-64dc-472d-8f26-1d7729b6423d",` + + ` "workflow_name": "test_workflow"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + job, err := client.GetJob(t.Context(), id) + if err != nil { + t.Fatalf("failed to get job: %v", err) + } + + if err := errors.Join( + eq("job.id", job.ID, id), + eq("job.workflow_id", job.WorkflowID, "16b80fee-64dc-472d-8f26-1d7729b6423d"), + eq("job.workflow_name", job.WorkflowName, "test_workflow"), + eq("job.status", job.Status, JobStatusScheduled), + equal("job.created_at", job.CreatedAt, time.Date(2025, 6, 22, 11, 37, 21, 648000000, time.UTC)), + ); err != nil { + t.Error(err) + } +} + +func TestGetJobNotFound(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "fcdc4994-eea5-425c-91fa-e03f2bd8030d" + mux.GetJob = func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "job ID "+r.PathValue("id")+" not found", http.StatusNotFound) + } + + _, err := client.GetJob(t.Context(), id) + if err == nil { + t.Fatalf("expected error, got nil") + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != http.StatusNotFound { + t.Fatalf("expected error code to be %d, got %d", http.StatusNotFound, apierr.Code) + } +} + +func TestGetJobError(t *testing.T) { + t.Parallel() + + id := "fcdc4994-eea5-425c-91fa-e03f2bd8030d" + + for _, code := range []int{ + http.StatusBadRequest, // 400 + http.StatusUnauthorized, // 401 + http.StatusForbidden, // 403 + http.StatusNotFound, // 404 + http.StatusInternalServerError, // 500 + http.StatusBadGateway, // 502 + http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout, // 504 + } { + t.Run(strconv.Itoa(code), func(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.GetJob = func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + } + + _, err := client.GetJob(t.Context(), id) + if err == nil { + t.Fatalf("expected error, got nil") + } + + if !strings.Contains(err.Error(), "API error occurred") { + t.Fatalf("expected error to contain 'API error occurred', got %v", err) + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != code { + t.Fatalf("expected error code to be %d, got %d", code, apierr.Code) + } + }) + } +} diff --git a/job_list.go b/job_list.go index a5bbb90..6347af9 100644 --- a/job_list.go +++ b/job_list.go @@ -16,7 +16,7 @@ type ListJobsRequest struct { func (c *Client) ListJobs(ctx context.Context, in *ListJobsRequest) ([]Job, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/jobs").String(), + c.endpoint.JoinPath("jobs", "").String(), nil, ) if err != nil { diff --git a/jobs_list_test.go b/jobs_list_test.go new file mode 100644 index 0000000..012476d --- /dev/null +++ b/jobs_list_test.go @@ -0,0 +1,53 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestListJobs(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListJobs = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`[` + + ` {` + + ` "created_at": "2025-06-22T11:37:21.648Z",` + + ` "id": "fcdc4994-eea5-425c-91fa-e03f2bd8030d",` + + ` "status": "IN_PROGRESS",` + + ` "runtime": null,` + + ` "workflow_id": "16b80fee-64dc-472d-8f26-1d7729b6423d",` + + ` "workflow_name": "test_workflow"` + + ` }` + + `]`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + jobs, err := client.ListJobs(t.Context(), &ListJobsRequest{}) + if err != nil { + t.Fatalf("failed to list jobs: %v", err) + } + + if len(jobs) != 1 { + t.Fatalf("expected 1 job, got %d", len(jobs)) + } + + job := jobs[0] + + if err := errors.Join( + eq("job.id", job.ID, "fcdc4994-eea5-425c-91fa-e03f2bd8030d"), + eq("job.workflow_id", job.WorkflowID, "16b80fee-64dc-472d-8f26-1d7729b6423d"), + eq("job.workflow_name", job.WorkflowName, "test_workflow"), + eq("job.status", job.Status, JobStatusInProgress), + equal("job.created_at", job.CreatedAt, time.Date(2025, 6, 22, 11, 37, 21, 648000000, time.UTC)), + ); err != nil { + t.Error(err) + } +} diff --git a/languages.go b/languages.go new file mode 100644 index 0000000..ce1edc1 --- /dev/null +++ b/languages.go @@ -0,0 +1,135 @@ +package unstructured + +// Language is a ISO 639-1 three-letter code representing a language supported by Unstructured for OCR. +type Language string + +// Supported language constants. +const ( + LanguageEnglish Language = "eng" + + LanguageAfrikaans Language = "afr" + LanguageAmharic Language = "amh" + LanguageArabic Language = "ara" + LanguageAssamese Language = "asm" + LanguageAzerbaijani Language = "aze" + LanguageAzerbaijaniCyrillic Language = "aze_cyrl" + LanguageBelarusian Language = "bel" + LanguageBengali Language = "ben" + LanguageTibetan Language = "bod" + LanguageBosnian Language = "bos" + LanguageBreton Language = "bre" + LanguageBulgarian Language = "bul" + LanguageCatalan Language = "cat" + LanguageCebuano Language = "ceb" + LanguageCzech Language = "ces" + LanguageSimplifiedChinese Language = "chi_sim" + LanguageSimplifiedChineseVertical Language = "chi_sim_vert" + LanguageTraditionalChinese Language = "chi_tra" + LanguageTraditionalChineseVertical Language = "chi_tra_vert" + LanguageCherokee Language = "chr" + LanguageCorsican Language = "cos" + LanguageWelsh Language = "cym" + LanguageDanish Language = "dan" + LanguageGerman Language = "deu" + LanguageDivehi Language = "div" + LanguageDzongkha Language = "dzo" + LanguageGreek Language = "ell" + LanguageMiddleEnglish Language = "enm" + LanguageEsperanto Language = "epo" + LanguageEquationDetection Language = "equ" + LanguageEstonian Language = "est" + LanguageBasque Language = "eus" + LanguageFaroese Language = "fao" + LanguagePersian Language = "fas" + LanguageFilipino Language = "fil" + LanguageFinnish Language = "fin" + LanguageFrench Language = "fra" + LanguageGermanFraktur Language = "frk" + LanguageFrenchMiddle Language = "frm" + LanguageFrisian Language = "fry" + LanguageScottishGaelic Language = "gla" + LanguageIrish Language = "gle" + LanguageGalician Language = "glg" + LanguageAncientGreek Language = "grc" + LanguageGujarati Language = "guj" + LanguageHaitian Language = "hat" + LanguageHebrew Language = "heb" + LanguageHindi Language = "hin" + LanguageCroatian Language = "hrv" + LanguageHungarian Language = "hun" + LanguageArmenian Language = "hye" + LanguageInuktitut Language = "iku" + LanguageIndonesian Language = "ind" + LanguageIcelandic Language = "isl" + LanguageItalian Language = "ita" + LanguageItalianOld Language = "ita_old" + LanguageJavanese Language = "jav" + LanguageJapanese Language = "jpn" + LanguageJapaneseVertical Language = "jpn_vert" + LanguageKannada Language = "kan" + LanguageGeorgian Language = "kat" + LanguageGeorgianOld Language = "kat_old" + LanguageKazakh Language = "kaz" + LanguageKhmer Language = "khm" + LanguageKyrgyz Language = "kir" + LanguageKurdish Language = "kmr" + LanguageKorean Language = "kor" + LanguageKoreanVertical Language = "kor_vert" + LanguageLao Language = "lao" + LanguageLatin Language = "lat" + LanguageLatvian Language = "lav" + LanguageLithuanian Language = "lit" + LanguageLuxembourgish Language = "ltz" + LanguageMalayalam Language = "mal" + LanguageMarathi Language = "mar" + LanguageMacedonian Language = "mkd" + LanguageMaltese Language = "mlt" + LanguageMongolian Language = "mon" + LanguageMaori Language = "mri" + LanguageMalay Language = "msa" + LanguageBurmese Language = "mya" + LanguageNepali Language = "nep" + LanguageDutch Language = "nld" + LanguageNorwegian Language = "nor" + LanguageOccitan Language = "oci" + LanguageOriya Language = "ori" + LanguageOrientationDetection Language = "osd" + LanguagePanjabi Language = "pan" + LanguagePolish Language = "pol" + LanguagePortuguese Language = "por" + LanguagePunjabi Language = "pus" + LanguageQuechua Language = "que" + LanguageRomanian Language = "ron" + LanguageRussian Language = "rus" + LanguageSanskrit Language = "san" + LanguageSinhala Language = "sin" + LanguageSlovak Language = "slk" + LanguageSlovenian Language = "slv" + LanguageSindhi Language = "snd" + LanguageSNUM Language = "snum" + LanguageSpanish Language = "spa" + LanguageSpanishOld Language = "spa_old" + LanguageAlbanian Language = "sqi" + LanguageSerbian Language = "srp" + LanguageSerbianLatin Language = "srp_latn" + LanguageSundanese Language = "sun" + LanguageSwahili Language = "swa" + LanguageSwedish Language = "swe" + LanguageSyriac Language = "syr" + LanguageTamil Language = "tam" + LanguageTatar Language = "tat" + LanguageTelugu Language = "tel" + LanguageTajik Language = "tgk" + LanguageThai Language = "tha" + LanguageTigrinya Language = "tir" + LanguageTonga Language = "ton" + LanguageTurkish Language = "tur" + LanguageUyghur Language = "uig" + LanguageUkrainian Language = "ukr" + LanguageUrdu Language = "urd" + LanguageUzbek Language = "uzb" + LanguageUzbekCyrillic Language = "uzb_cyrl" + LanguageVietnamese Language = "vie" + LanguageYiddish Language = "yid" + LanguageYoruba Language = "yor" +) diff --git a/openapi.json b/openapi.json new file mode 100644 index 0000000..1789333 --- /dev/null +++ b/openapi.json @@ -0,0 +1 @@ +{"openapi":"3.1.0","info":{"title":"Platform API","version":"3.0.2"},"servers":[{"url":"https://platform.unstructuredapp.io/","description":"Unstructured Platform API","x-speakeasy-server-id":"platform-api"}],"paths":{"/api/v1/destinations/":{"get":{"tags":["destinations"],"summary":"List destination connectors","description":"Retrieve a list of available destination connectors.","operationId":"list_destinations","parameters":[{"name":"destination_type","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/DestinationConnectorType"},{"type":"null"}],"title":"Destination Type"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/DestinationConnectorInformation"},"title":"Response List Destinations"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["destinations"],"summary":"Create destination connector","description":"Create a new destination connector using the provided configuration and name.","operationId":"create_destination","parameters":[{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateDestinationConnector"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DestinationConnectorInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/destinations/{destination_id}":{"get":{"tags":["destinations"],"summary":"Get destination connector","description":"Retrieve detailed information for a specific destination connector by its ID.","operationId":"get_destination","parameters":[{"name":"destination_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Destination Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DestinationConnectorInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["destinations"],"summary":"Update destination connector","description":"Update the configuration of an existing destination connector.","operationId":"update_destination","parameters":[{"name":"destination_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Destination Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateDestinationConnector"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DestinationConnectorInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["destinations"],"summary":"Delete destination connector","description":"Delete a specific destination connector by its ID.","operationId":"delete_destination","parameters":[{"name":"destination_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Destination Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/destinations/{destination_id}/connection-check":{"post":{"tags":["destinations"],"summary":"Create destination connection check","description":"Initiate a connection check for the destination connector","operationId":"create_connection_check_destinations","parameters":[{"name":"destination_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Destination Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"202":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DagNodeConnectionCheck"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-speakeasy-name-override":"create_connection_check_destinations"},"get":{"tags":["destinations"],"summary":"Get the latest destination connector connection check","description":"Retrieves the most recent connection check for the specified destination connector.","operationId":"get_connection_check_destinations","parameters":[{"name":"destination_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Destination Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DagNodeConnectionCheck"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-speakeasy-name-override":"get_connection_check_destinations"}},"/api/v1/sources/":{"get":{"tags":["sources"],"summary":"List available source connectors","description":"Retrieve a list of available source connectors.","operationId":"list_sources","parameters":[{"name":"source_type","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/SourceConnectorType"},{"type":"null"}],"title":"Source Type"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/SourceConnectorInformation"},"title":"Response List Sources"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"tags":["sources"],"summary":"Create source connector","description":"Create a new source connector using the provided configuration and name.","operationId":"create_source","parameters":[{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateSourceConnector"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SourceConnectorInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/sources/{source_id}":{"get":{"tags":["sources"],"summary":"Get source connector","description":"Retrieve detailed information for a specific source connector by its ID.","operationId":"get_source","parameters":[{"name":"source_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Source Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SourceConnectorInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["sources"],"summary":"Delete source connector","description":"Delete a specific source connector identified by its ID.","operationId":"delete_source","parameters":[{"name":"source_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Source Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["sources"],"summary":"Update source connector","description":"Update the configuration of an existing source connector.","operationId":"update_source","parameters":[{"name":"source_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Source Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateSourceConnector"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SourceConnectorInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/sources/{source_id}/connection-check":{"post":{"tags":["sources"],"summary":"Create source connection check","description":"Initiates a connection check for the specified source connector.","operationId":"create_connection_check_sources","parameters":[{"name":"source_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Source Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"202":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DagNodeConnectionCheck"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-speakeasy-name-override":"create_connection_check_sources"},"get":{"tags":["sources"],"summary":"Get the latest source connector connection check","description":"Retrieves the most recent connection check for the specified source connector.","operationId":"get_connection_check_sources","parameters":[{"name":"source_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Source Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DagNodeConnectionCheck"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-speakeasy-name-override":"get_connection_check_sources"}},"/api/v1/jobs/":{"get":{"tags":["jobs"],"summary":"List Jobs","description":"Retrieve a list of jobs with optional filtering by workflow ID or job status.","operationId":"list_jobs","parameters":[{"name":"workflow_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Workflow Id"}},{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Status"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/JobInformation"},"title":"Response List Jobs"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/jobs/{job_id}":{"get":{"tags":["jobs"],"summary":"Get Job","description":"Retrieve detailed information for a specific job by its ID.","operationId":"get_job","parameters":[{"name":"job_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Job Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JobInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/jobs/{job_id}/cancel":{"post":{"tags":["jobs"],"summary":"Cancel Job","description":"Cancel the specified job.","operationId":"cancel_job","parameters":[{"name":"job_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Job Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/jobs/{job_id}/download":{"get":{"tags":["jobs"],"summary":"Download Job output","description":"Download the output of a job from a workflow where the input file was provided at runtime.","operationId":"download_job_output","parameters":[{"name":"job_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Job Id"}},{"name":"file_id","in":"query","required":true,"schema":{"type":"string","description":"ID of the file to download","title":"File Id"},"description":"ID of the file to download"},{"name":"node_id","in":"query","required":true,"schema":{"type":"string","format":"uuid","description":"Node ID to retrieve the corresponding output file","title":"Node Id"},"description":"Node ID to retrieve the corresponding output file"},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/jobs/{job_id}/details":{"get":{"tags":["jobs"],"summary":"Get Job processing details","description":"Retrieve processing details for a specific job by its ID.","operationId":"get_job_details","parameters":[{"name":"job_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Job Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JobDetails"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/jobs/{job_id}/failed-files":{"get":{"tags":["jobs"],"summary":"Get Job Failed Files","description":"Retrieve failed files for a specific job by its ID.","operationId":"get_job_failed_files","parameters":[{"name":"job_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Job Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JobFailedFiles"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/workflows/":{"post":{"tags":["workflows"],"summary":"Create Workflow","description":"Create a new workflow, either custom or auto, and configure its settings.","operationId":"create_workflow","parameters":[{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateWorkflow"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkflowInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"get":{"tags":["workflows"],"summary":"List Workflows","description":"Retrieve a list of workflows, optionally filtered by source, destination, state, name, date range, and supports pagination and sorting.","operationId":"list_workflows","parameters":[{"name":"dag_node_configuration_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Dag Node Configuration Id"}},{"name":"source_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Source Id"}},{"name":"destination_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Destination Id"}},{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/WorkflowState"},{"type":"null"}],"title":"Status"}},{"name":"page","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":1,"title":"Page"}},{"name":"page_size","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":20,"title":"Page Size"}},{"name":"created_since","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Created Since"}},{"name":"created_before","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Created Before"}},{"name":"name","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"}},{"name":"sort_by","in":"query","required":false,"schema":{"type":"string","default":"id","title":"Sort By"}},{"name":"sort_direction","in":"query","required":false,"schema":{"$ref":"#/components/schemas/SortDirection","default":"asc"}},{"name":"show_only_soft_deleted","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Show Only Soft Deleted"}},{"name":"show_recommender_workflows","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Show Recommender Workflows"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/WorkflowInformation"},"title":"Response List Workflows"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/workflows/{workflow_id}":{"get":{"tags":["workflows"],"summary":"Get Workflow","description":"Retrieve detailed information for a specific workflow by its ID.","operationId":"get_workflow","parameters":[{"name":"workflow_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Workflow Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkflowInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"tags":["workflows"],"summary":"Update Workflow","description":"Update an existing workflow's name, connectors, schedule, or workflow type.","operationId":"update_workflow","parameters":[{"name":"workflow_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Workflow Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdateWorkflow"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkflowInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["workflows"],"summary":"Delete Workflow","description":"Delete a workflow by its ID.","operationId":"delete_workflow","parameters":[{"name":"workflow_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Workflow Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v1/workflows/{workflow_id}/run":{"post":{"tags":["workflows"],"summary":"Run Workflow","description":"Run a workflow by triggering a new job if none is currently active.","operationId":"run_workflow","parameters":[{"name":"workflow_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Workflow Id"}},{"name":"unstructured-api-key","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Unstructured-Api-Key"}}],"requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_run_workflow"}}}},"responses":{"202":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/JobInformation"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"AstraDBConnectorConfig":{"properties":{"collection_name":{"type":"string","title":"Collection Name"},"keyspace":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Keyspace"},"batch_size":{"type":"integer","title":"Batch Size"},"api_endpoint":{"type":"string","title":"Api Endpoint"},"token":{"type":"string","title":"Token"}},"type":"object","required":["collection_name","batch_size","api_endpoint","token"],"title":"AstraDBConnectorConfig"},"AstraDBConnectorConfigInput":{"properties":{"collection_name":{"type":"string","pattern":"^\\w+$","title":"Collection Name"},"keyspace":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Keyspace","default":"default_keyspace"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":20},"api_endpoint":{"type":"string","title":"Api Endpoint"},"token":{"type":"string","title":"Token"},"flatten_metadata":{"type":"boolean","title":"Flatten Metadata","default":false}},"type":"object","required":["collection_name","api_endpoint","token"],"title":"AstraDBConnectorConfigInput"},"AzureAISearchConnectorConfig":{"properties":{"endpoint":{"type":"string","title":"Endpoint"},"index":{"type":"string","title":"Index"},"key":{"type":"string","title":"Key"}},"type":"object","required":["endpoint","index","key"],"title":"AzureAISearchConnectorConfig"},"AzureAISearchConnectorConfigInput":{"properties":{"endpoint":{"type":"string","pattern":"^https:\\/\\/[a-z0-9-]+","title":"Endpoint"},"index":{"type":"string","title":"Index"},"key":{"type":"string","title":"Key"}},"type":"object","required":["endpoint","index","key"],"title":"AzureAISearchConnectorConfigInput"},"AzureSourceConnectorConfig":{"properties":{"remote_url":{"type":"string","title":"Remote Url"},"account_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Account Name"},"account_key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Account Key"},"connection_string":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Connection String"},"sas_token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Sas Token"},"recursive":{"type":"boolean","title":"Recursive"}},"type":"object","required":["remote_url","recursive"],"title":"AzureSourceConnectorConfig"},"AzureSourceConnectorConfigInput":{"properties":{"remote_url":{"type":"string","pattern":"^(az:\\/\\/|abfs:\\/\\/)","title":"Remote Url"},"account_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Account Name"},"account_key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Account Key"},"connection_string":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Connection String"},"sas_token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Sas Token"},"recursive":{"type":"boolean","title":"Recursive","default":true}},"type":"object","required":["remote_url"],"title":"AzureSourceConnectorConfigInput"},"Body_run_workflow":{"properties":{"input_files":{"anyOf":[{"items":{"type":"string","format":"binary"},"type":"array"},{"type":"null"}],"title":"Input Files"}},"type":"object","title":"Body_run_workflow"},"BoxSourceConnectorConfig":{"properties":{"box_app_config":{"type":"string","title":"Box App Config"},"recursive":{"type":"boolean","title":"Recursive"}},"type":"object","required":["box_app_config","recursive"],"title":"BoxSourceConnectorConfig"},"BoxSourceConnectorConfigInput":{"properties":{"box_app_config":{"type":"string","title":"Box App Config"},"remote_url":{"type":"string","pattern":"^box:\\/\\/","title":"Remote Url"},"recursive":{"type":"boolean","title":"Recursive","default":true}},"type":"object","required":["box_app_config","remote_url"],"title":"BoxSourceConnectorConfigInput"},"ConfluenceSourceConnectorConfig":{"properties":{"url":{"type":"string","title":"Url"},"username":{"type":"string","title":"Username"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"api_token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Token"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"cloud":{"type":"boolean","title":"Cloud"},"extract_images":{"type":"boolean","title":"Extract Images","default":false},"extract_files":{"type":"boolean","title":"Extract Files","default":false},"max_num_of_spaces":{"type":"integer","title":"Max Num Of Spaces"},"max_num_of_docs_from_each_space":{"type":"integer","title":"Max Num Of Docs From Each Space"},"spaces":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Spaces"}},"type":"object","required":["url","username","cloud","max_num_of_spaces","max_num_of_docs_from_each_space","spaces"],"title":"ConfluenceSourceConnectorConfig"},"ConfluenceSourceConnectorConfigInput":{"properties":{"url":{"type":"string","title":"Url"},"username":{"type":"string","title":"Username"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"api_token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Token"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"cloud":{"type":"boolean","title":"Cloud","default":false},"extract_images":{"type":"boolean","title":"Extract Images","default":false},"extract_files":{"type":"boolean","title":"Extract Files","default":false},"max_num_of_spaces":{"type":"integer","minimum":1.0,"title":"Max Num Of Spaces","default":500},"max_num_of_docs_from_each_space":{"type":"integer","minimum":1.0,"title":"Max Num Of Docs From Each Space","default":150},"spaces":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Spaces"}},"type":"object","required":["url","username"],"title":"ConfluenceSourceConnectorConfigInput"},"ConnectionCheckStatus":{"type":"string","enum":["SCHEDULED","SUCCESS","FAILURE"],"title":"ConnectionCheckStatus"},"CouchbaseDestinationConnectorConfig":{"properties":{"bucket":{"type":"string","title":"Bucket"},"connection_string":{"type":"string","title":"Connection String"},"scope":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Scope"},"collection":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Collection"},"batch_size":{"type":"integer","title":"Batch Size"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"}},"type":"object","required":["bucket","connection_string","batch_size","username","password"],"title":"CouchbaseDestinationConnectorConfig"},"CouchbaseDestinationConnectorConfigInput":{"properties":{"bucket":{"type":"string","title":"Bucket"},"connection_string":{"type":"string","title":"Connection String"},"scope":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Scope"},"collection":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Collection"},"batch_size":{"type":"integer","title":"Batch Size"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"}},"type":"object","required":["bucket","connection_string","batch_size","username","password"],"title":"CouchbaseDestinationConnectorConfigInput"},"CouchbaseSourceConnectorConfig":{"properties":{"bucket":{"type":"string","title":"Bucket"},"connection_string":{"type":"string","title":"Connection String"},"scope":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Scope"},"collection":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Collection"},"batch_size":{"type":"integer","title":"Batch Size"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"collection_id":{"type":"string","title":"Collection Id"}},"type":"object","required":["bucket","connection_string","batch_size","username","password","collection_id"],"title":"CouchbaseSourceConnectorConfig"},"CouchbaseSourceConnectorConfigInput":{"properties":{"bucket":{"type":"string","title":"Bucket"},"connection_string":{"type":"string","title":"Connection String"},"scope":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Scope"},"collection":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Collection"},"batch_size":{"type":"integer","title":"Batch Size"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"collection_id":{"type":"string","title":"Collection Id"}},"type":"object","required":["bucket","connection_string","batch_size","username","password","collection_id"],"title":"CouchbaseSourceConnectorConfigInput"},"CreateDestinationConnector":{"properties":{"name":{"type":"string","title":"Name"},"type":{"$ref":"#/components/schemas/DestinationConnectorType"},"config":{"anyOf":[{"$ref":"#/components/schemas/AstraDBConnectorConfigInput"},{"$ref":"#/components/schemas/AzureAISearchConnectorConfigInput"},{"$ref":"#/components/schemas/CouchbaseDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/DatabricksVolumesConnectorConfigInput"},{"$ref":"#/components/schemas/DatabricksVDTDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/DeltaTableConnectorConfigInput"},{"$ref":"#/components/schemas/ElasticsearchConnectorConfigInput"},{"$ref":"#/components/schemas/GCSDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/KafkaCloudDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/MilvusDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/MongoDBConnectorConfigInput"},{"$ref":"#/components/schemas/Neo4jDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/OneDriveDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/PineconeDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/PostgresDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/RedisDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/QdrantCloudDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/S3DestinationConnectorConfigInput"},{"$ref":"#/components/schemas/SnowflakeDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/WeaviateDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/IBMWatsonxS3DestinationConnectorConfigInput"},{"additionalProperties":true,"type":"object"}],"title":"Config"}},"type":"object","required":["name","type","config"],"title":"CreateDestinationConnector"},"CreateSourceConnector":{"properties":{"name":{"type":"string","title":"Name"},"type":{"$ref":"#/components/schemas/SourceConnectorType"},"config":{"anyOf":[{"$ref":"#/components/schemas/AzureSourceConnectorConfigInput"},{"$ref":"#/components/schemas/BoxSourceConnectorConfigInput"},{"$ref":"#/components/schemas/ConfluenceSourceConnectorConfigInput"},{"$ref":"#/components/schemas/CouchbaseSourceConnectorConfigInput"},{"$ref":"#/components/schemas/DatabricksVolumesConnectorConfigInput"},{"$ref":"#/components/schemas/DropboxSourceConnectorConfigInput"},{"$ref":"#/components/schemas/ElasticsearchConnectorConfigInput"},{"$ref":"#/components/schemas/GCSSourceConnectorConfigInput"},{"$ref":"#/components/schemas/GoogleDriveSourceConnectorConfigInput"},{"$ref":"#/components/schemas/KafkaCloudSourceConnectorConfigInput"},{"$ref":"#/components/schemas/MongoDBConnectorConfigInput"},{"$ref":"#/components/schemas/OneDriveSourceConnectorConfigInput"},{"$ref":"#/components/schemas/OutlookSourceConnectorConfigInput"},{"$ref":"#/components/schemas/PostgresSourceConnectorConfigInput"},{"$ref":"#/components/schemas/S3SourceConnectorConfigInput"},{"$ref":"#/components/schemas/SalesforceSourceConnectorConfigInput"},{"$ref":"#/components/schemas/SharePointSourceConnectorConfigInput"},{"$ref":"#/components/schemas/SnowflakeSourceConnectorConfigInput"},{"$ref":"#/components/schemas/JiraSourceConnectorConfigInput"},{"$ref":"#/components/schemas/ZendeskSourceConnectorConfigInput"},{"additionalProperties":true,"type":"object"}],"title":"Config"}},"type":"object","required":["name","type","config"],"title":"CreateSourceConnector"},"CreateWorkflow":{"properties":{"name":{"type":"string","title":"Name"},"source_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Source Id"},"destination_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Destination Id"},"workflow_type":{"$ref":"#/components/schemas/WorkflowType"},"workflow_nodes":{"anyOf":[{"items":{"$ref":"#/components/schemas/WorkflowNode"},"type":"array"},{"type":"null"}],"title":"Workflow Nodes"},"schedule":{"anyOf":[{"type":"string","enum":["every 15 minutes","every hour","every 2 hours","every 4 hours","every 6 hours","every 8 hours","every 10 hours","every 12 hours","daily","weekly","monthly"]},{"type":"null"}],"title":"Schedule"},"reprocess_all":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Reprocess All","default":false}},"type":"object","required":["name","workflow_type"],"title":"CreateWorkflow"},"CronTabEntry":{"properties":{"cron_expression":{"type":"string","title":"Cron Expression"}},"type":"object","required":["cron_expression"],"title":"CronTabEntry"},"DagNodeConnectionCheck":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"status":{"$ref":"#/components/schemas/ConnectionCheckStatus"},"reason":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reason"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"reported_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Reported At"}},"type":"object","required":["id","status","created_at"],"title":"DagNodeConnectionCheck"},"DatabricksVDTDestinationConnectorConfig":{"properties":{"server_hostname":{"type":"string","title":"Server Hostname"},"http_path":{"type":"string","title":"Http Path"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"},"catalog":{"type":"string","title":"Catalog"},"database":{"type":"string","title":"Database","default":"default"},"table_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Table Name"},"schema":{"type":"string","title":"Schema","default":"default"},"volume":{"type":"string","title":"Volume"},"volume_path":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Volume Path"}},"type":"object","required":["server_hostname","http_path","catalog","volume"],"title":"DatabricksVDTDestinationConnectorConfig"},"DatabricksVDTDestinationConnectorConfigInput":{"properties":{"server_hostname":{"type":"string","title":"Server Hostname"},"http_path":{"type":"string","title":"Http Path"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"},"catalog":{"type":"string","title":"Catalog"},"database":{"type":"string","title":"Database","default":"default"},"table_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Table Name"},"schema":{"type":"string","title":"Schema"},"volume":{"type":"string","title":"Volume"},"volume_path":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Volume Path"}},"type":"object","required":["server_hostname","http_path","catalog","volume"],"title":"DatabricksVDTDestinationConnectorConfigInput"},"DatabricksVolumesConnectorConfig":{"properties":{"host":{"type":"string","title":"Host"},"catalog":{"type":"string","title":"Catalog"},"schema":{"type":"string","title":"Schema","default":"default"},"volume":{"type":"string","title":"Volume"},"volume_path":{"type":"string","title":"Volume Path"},"client_secret":{"type":"string","title":"Client Secret"},"client_id":{"type":"string","title":"Client Id"}},"type":"object","required":["host","catalog","volume","volume_path","client_secret","client_id"],"title":"DatabricksVolumesConnectorConfig"},"DatabricksVolumesConnectorConfigInput":{"properties":{"host":{"type":"string","title":"Host"},"catalog":{"type":"string","title":"Catalog"},"schema":{"type":"string","title":"Schema"},"volume":{"type":"string","title":"Volume"},"volume_path":{"type":"string","title":"Volume Path"},"client_secret":{"type":"string","title":"Client Secret"},"client_id":{"type":"string","title":"Client Id"}},"type":"object","required":["host","catalog","volume","volume_path","client_secret","client_id"],"title":"DatabricksVolumesConnectorConfigInput"},"DeltaTableConnectorConfig":{"properties":{"aws_access_key_id":{"type":"string","title":"Aws Access Key Id"},"aws_secret_access_key":{"type":"string","title":"Aws Secret Access Key"},"aws_region":{"type":"string","title":"Aws Region"},"table_uri":{"type":"string","title":"Table Uri"}},"type":"object","required":["aws_access_key_id","aws_secret_access_key","aws_region","table_uri"],"title":"DeltaTableConnectorConfig"},"DeltaTableConnectorConfigInput":{"properties":{"aws_access_key_id":{"type":"string","title":"Aws Access Key Id"},"aws_secret_access_key":{"type":"string","title":"Aws Secret Access Key"},"aws_region":{"type":"string","title":"Aws Region"},"table_uri":{"type":"string","pattern":"^s3:\\/\\/","title":"Table Uri"}},"type":"object","required":["aws_access_key_id","aws_secret_access_key","aws_region","table_uri"],"title":"DeltaTableConnectorConfigInput"},"DestinationConnectorInformation":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"type":"string","title":"Name"},"type":{"$ref":"#/components/schemas/DestinationConnectorType"},"config":{"anyOf":[{"$ref":"#/components/schemas/AstraDBConnectorConfig"},{"$ref":"#/components/schemas/AzureAISearchConnectorConfig"},{"$ref":"#/components/schemas/CouchbaseDestinationConnectorConfig"},{"$ref":"#/components/schemas/DatabricksVolumesConnectorConfig"},{"$ref":"#/components/schemas/DatabricksVDTDestinationConnectorConfig"},{"$ref":"#/components/schemas/DeltaTableConnectorConfig"},{"$ref":"#/components/schemas/ElasticsearchConnectorConfig"},{"$ref":"#/components/schemas/GCSDestinationConnectorConfig"},{"$ref":"#/components/schemas/KafkaCloudDestinationConnectorConfig"},{"$ref":"#/components/schemas/MilvusDestinationConnectorConfig"},{"$ref":"#/components/schemas/MongoDBConnectorConfig"},{"$ref":"#/components/schemas/Neo4jDestinationConnectorConfig"},{"$ref":"#/components/schemas/OneDriveDestinationConnectorConfig"},{"$ref":"#/components/schemas/PineconeDestinationConnectorConfig"},{"$ref":"#/components/schemas/PostgresDestinationConnectorConfig"},{"$ref":"#/components/schemas/RedisDestinationConnectorConfig"},{"$ref":"#/components/schemas/QdrantCloudDestinationConnectorConfig"},{"$ref":"#/components/schemas/S3DestinationConnectorConfig"},{"$ref":"#/components/schemas/SnowflakeDestinationConnectorConfig"},{"$ref":"#/components/schemas/WeaviateDestinationConnectorConfig"},{"$ref":"#/components/schemas/IBMWatsonxS3DestinationConnectorConfig"},{"additionalProperties":true,"type":"object"}],"title":"Config"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Updated At"}},"type":"object","required":["id","name","type","config","created_at"],"title":"DestinationConnectorInformation"},"DestinationConnectorType":{"type":"string","enum":["astradb","azure_ai_search","couchbase","databricks_volumes","databricks_volume_delta_tables","delta_table","elasticsearch","gcs","kafka-cloud","milvus","mongodb","motherduck","neo4j","onedrive","pinecone","postgres","redis","qdrant-cloud","s3","snowflake","weaviate-cloud","ibm_watsonx_s3"],"title":"DestinationConnectorType"},"DropboxSourceConnectorConfig":{"properties":{"token":{"type":"string","title":"Token"},"remote_url":{"type":"string","title":"Remote Url"},"recursive":{"type":"boolean","title":"Recursive"}},"type":"object","required":["token","remote_url","recursive"],"title":"DropboxSourceConnectorConfig"},"DropboxSourceConnectorConfigInput":{"properties":{"token":{"type":"string","title":"Token"},"remote_url":{"type":"string","pattern":"^dropbox:\\/\\/","title":"Remote Url"},"recursive":{"type":"boolean","title":"Recursive","default":true}},"type":"object","required":["token","remote_url"],"title":"DropboxSourceConnectorConfigInput"},"ElasticsearchConnectorConfig":{"properties":{"hosts":{"items":{"type":"string"},"type":"array","title":"Hosts"},"index_name":{"type":"string","title":"Index Name"},"es_api_key":{"type":"string","title":"Es Api Key"}},"type":"object","required":["hosts","index_name","es_api_key"],"title":"ElasticsearchConnectorConfig"},"ElasticsearchConnectorConfigInput":{"properties":{"hosts":{"items":{"type":"string"},"type":"array","title":"Hosts"},"index_name":{"type":"string","title":"Index Name"},"es_api_key":{"type":"string","title":"Es Api Key"}},"type":"object","required":["hosts","index_name","es_api_key"],"title":"ElasticsearchConnectorConfigInput"},"EncryptionType":{"type":"string","enum":["rsa","rsa_aes"],"title":"EncryptionType"},"FailedFile":{"properties":{"document":{"type":"string","title":"Document"},"error":{"type":"string","title":"Error"}},"type":"object","required":["document","error"],"title":"FailedFile"},"GCSDestinationConnectorConfig":{"properties":{"remote_url":{"type":"string","title":"Remote Url"},"service_account_key":{"type":"string","title":"Service Account Key"}},"type":"object","required":["remote_url","service_account_key"],"title":"GCSDestinationConnectorConfig"},"GCSDestinationConnectorConfigInput":{"properties":{"remote_url":{"type":"string","pattern":"^(gs|gcs):\\/\\/","title":"Remote Url"},"service_account_key":{"type":"string","title":"Service Account Key"}},"type":"object","required":["remote_url","service_account_key"],"title":"GCSDestinationConnectorConfigInput"},"GCSSourceConnectorConfig":{"properties":{"remote_url":{"type":"string","title":"Remote Url"},"service_account_key":{"type":"string","title":"Service Account Key"},"recursive":{"type":"boolean","title":"Recursive"}},"type":"object","required":["remote_url","service_account_key","recursive"],"title":"GCSSourceConnectorConfig"},"GCSSourceConnectorConfigInput":{"properties":{"remote_url":{"type":"string","pattern":"^(gs|gcs):\\/\\/","title":"Remote Url"},"service_account_key":{"type":"string","title":"Service Account Key"},"recursive":{"type":"boolean","title":"Recursive","default":true}},"type":"object","required":["remote_url","service_account_key"],"title":"GCSSourceConnectorConfigInput"},"GoogleDriveSourceConnectorConfig":{"properties":{"drive_id":{"type":"string","title":"Drive Id"},"service_account_key":{"anyOf":[{"$ref":"#/components/schemas/SecretReference"},{"type":"string"}],"title":"Service Account Key"},"extensions":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Extensions"},"recursive":{"type":"boolean","title":"Recursive"}},"type":"object","required":["drive_id","service_account_key","recursive"],"title":"GoogleDriveSourceConnectorConfig"},"GoogleDriveSourceConnectorConfigInput":{"properties":{"drive_id":{"type":"string","title":"Drive Id"},"service_account_key":{"anyOf":[{"$ref":"#/components/schemas/SecretReference"},{"type":"string"}],"title":"Service Account Key"},"extensions":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Extensions"},"recursive":{"type":"boolean","title":"Recursive","default":true}},"type":"object","required":["drive_id","service_account_key"],"title":"GoogleDriveSourceConnectorConfigInput"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"IBMWatsonxS3DestinationConnectorConfig":{"properties":{"iam_api_key":{"type":"string","title":"Iam Api Key"},"access_key_id":{"type":"string","title":"Access Key Id"},"secret_access_key":{"type":"string","title":"Secret Access Key"},"iceberg_endpoint":{"type":"string","title":"Iceberg Endpoint"},"object_storage_endpoint":{"type":"string","title":"Object Storage Endpoint"},"object_storage_region":{"type":"string","title":"Object Storage Region"},"catalog":{"type":"string","title":"Catalog"},"max_retries_connection":{"type":"integer","title":"Max Retries Connection"},"namespace":{"type":"string","title":"Namespace"},"table":{"type":"string","title":"Table"},"max_retries":{"type":"integer","title":"Max Retries"},"record_id_key":{"type":"string","title":"Record Id Key"}},"type":"object","required":["iam_api_key","access_key_id","secret_access_key","iceberg_endpoint","object_storage_endpoint","object_storage_region","catalog","max_retries_connection","namespace","table","max_retries","record_id_key"],"title":"IBMWatsonxS3DestinationConnectorConfig"},"IBMWatsonxS3DestinationConnectorConfigInput":{"properties":{"iam_api_key":{"type":"string","title":"Iam Api Key"},"access_key_id":{"type":"string","title":"Access Key Id"},"secret_access_key":{"type":"string","title":"Secret Access Key"},"iceberg_endpoint":{"type":"string","title":"Iceberg Endpoint"},"object_storage_endpoint":{"type":"string","title":"Object Storage Endpoint"},"object_storage_region":{"type":"string","title":"Object Storage Region"},"catalog":{"type":"string","title":"Catalog"},"max_retries_connection":{"type":"integer","maximum":100.0,"minimum":2.0,"title":"Max Retries Connection","description":"Maximum number of retries in case of a connection error","default":10},"namespace":{"type":"string","title":"Namespace"},"table":{"type":"string","title":"Table"},"max_retries":{"type":"integer","maximum":500.0,"minimum":2.0,"title":"Max Retries","description":"Maximum number of retries to upload data","default":50},"record_id_key":{"type":"string","title":"Record Id Key","description":"Searchable key to find entries for the same record on previous runs","default":"record_id"}},"type":"object","required":["iam_api_key","access_key_id","secret_access_key","iceberg_endpoint","object_storage_endpoint","object_storage_region","catalog","namespace","table"],"title":"IBMWatsonxS3DestinationConnectorConfigInput"},"JiraSourceConnectorConfig":{"properties":{"url":{"type":"string","title":"Url"},"username":{"type":"string","title":"Username"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"cloud":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Cloud","default":false},"projects":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Projects"},"boards":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Boards"},"issues":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Issues"},"status_filters":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Status Filters"},"download_attachments":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Download Attachments","default":false}},"type":"object","required":["url","username"],"title":"JiraSourceConnectorConfig"},"JiraSourceConnectorConfigInput":{"properties":{"url":{"type":"string","title":"Url"},"username":{"type":"string","title":"Username"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"cloud":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Cloud","default":false},"projects":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Projects"},"boards":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Boards"},"issues":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Issues"},"status_filters":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Status Filters"},"download_attachments":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Download Attachments","default":false}},"type":"object","required":["url","username"],"title":"JiraSourceConnectorConfigInput"},"JobDetails":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"processing_status":{"$ref":"#/components/schemas/JobProcessingStatus"},"node_stats":{"items":{"$ref":"#/components/schemas/JobNodeDetails"},"type":"array","title":"Node Stats"},"message":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Message"}},"type":"object","required":["id","processing_status","node_stats"],"title":"JobDetails"},"JobFailedFiles":{"properties":{"failed_files":{"items":{"$ref":"#/components/schemas/FailedFile"},"type":"array","title":"Failed Files"}},"type":"object","required":["failed_files"],"title":"JobFailedFiles"},"JobInformation":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"workflow_id":{"type":"string","format":"uuid","title":"Workflow Id"},"workflow_name":{"type":"string","title":"Workflow Name"},"status":{"$ref":"#/components/schemas/JobStatus"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"runtime":{"anyOf":[{"type":"string","format":"duration"},{"type":"null"}],"title":"Runtime"},"input_file_ids":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Input File Ids"},"output_node_files":{"anyOf":[{"items":{"$ref":"#/components/schemas/NodeFileMetadata"},"type":"array"},{"type":"null"}],"title":"Output Node Files"},"job_type":{"$ref":"#/components/schemas/WorkflowJobType","default":"ephemeral"}},"type":"object","required":["id","workflow_id","workflow_name","status","created_at"],"title":"JobInformation"},"JobNodeDetails":{"properties":{"node_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Node Name"},"node_type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Node Type"},"node_subtype":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Node Subtype"},"ready":{"type":"integer","title":"Ready"},"in_progress":{"type":"integer","title":"In Progress"},"success":{"type":"integer","title":"Success"},"failure":{"type":"integer","title":"Failure"}},"type":"object","required":["ready","in_progress","success","failure"],"title":"JobNodeDetails"},"JobProcessingStatus":{"type":"string","enum":["SCHEDULED","IN_PROGRESS","SUCCESS","COMPLETED_WITH_ERRORS","STOPPED","FAILED"],"title":"JobProcessingStatus"},"JobStatus":{"type":"string","enum":["SCHEDULED","IN_PROGRESS","COMPLETED","STOPPED","FAILED"],"title":"JobStatus"},"KafkaCloudDestinationConnectorConfig":{"properties":{"bootstrap_servers":{"type":"string","title":"Bootstrap Servers"},"port":{"type":"integer","title":"Port"},"group_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Group Id"},"topic":{"type":"string","title":"Topic"},"kafka_api_key":{"type":"string","title":"Kafka Api Key"},"secret":{"type":"string","title":"Secret"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["bootstrap_servers","port","topic","kafka_api_key","secret","batch_size"],"title":"KafkaCloudDestinationConnectorConfig"},"KafkaCloudDestinationConnectorConfigInput":{"properties":{"bootstrap_servers":{"type":"string","title":"Bootstrap Servers"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":9092},"group_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Group Id"},"topic":{"type":"string","title":"Topic"},"kafka_api_key":{"type":"string","title":"Kafka Api Key"},"secret":{"type":"string","title":"Secret"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":100}},"type":"object","required":["bootstrap_servers","topic","kafka_api_key","secret"],"title":"KafkaCloudDestinationConnectorConfigInput"},"KafkaCloudSourceConnectorConfig":{"properties":{"bootstrap_servers":{"type":"string","title":"Bootstrap Servers"},"port":{"type":"integer","title":"Port"},"group_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Group Id"},"topic":{"type":"string","title":"Topic"},"kafka_api_key":{"type":"string","title":"Kafka Api Key"},"secret":{"type":"string","title":"Secret"},"num_messages_to_consume":{"type":"integer","title":"Num Messages To Consume"}},"type":"object","required":["bootstrap_servers","port","topic","kafka_api_key","secret","num_messages_to_consume"],"title":"KafkaCloudSourceConnectorConfig"},"KafkaCloudSourceConnectorConfigInput":{"properties":{"bootstrap_servers":{"type":"string","title":"Bootstrap Servers"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":9092},"group_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Group Id"},"topic":{"type":"string","title":"Topic"},"kafka_api_key":{"type":"string","title":"Kafka Api Key"},"secret":{"type":"string","title":"Secret"},"num_messages_to_consume":{"type":"integer","minimum":1.0,"title":"Num Messages To Consume","default":100}},"type":"object","required":["bootstrap_servers","topic","kafka_api_key","secret"],"title":"KafkaCloudSourceConnectorConfigInput"},"MilvusDestinationConnectorConfig":{"properties":{"uri":{"type":"string","title":"Uri"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"db_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Db Name"},"collection_name":{"type":"string","title":"Collection Name"},"record_id_key":{"type":"string","title":"Record Id Key"}},"type":"object","required":["uri","collection_name","record_id_key"],"title":"MilvusDestinationConnectorConfig"},"MilvusDestinationConnectorConfigInput":{"properties":{"uri":{"type":"string","pattern":"^https?:\\/\\/","title":"Uri"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"db_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Db Name"},"collection_name":{"type":"string","title":"Collection Name"},"record_id_key":{"type":"string","title":"Record Id Key"}},"type":"object","required":["uri","collection_name","record_id_key"],"title":"MilvusDestinationConnectorConfigInput"},"MongoDBConnectorConfig":{"properties":{"database":{"type":"string","title":"Database"},"collection":{"type":"string","title":"Collection"},"uri":{"type":"string","title":"Uri"}},"type":"object","required":["database","collection","uri"],"title":"MongoDBConnectorConfig"},"MongoDBConnectorConfigInput":{"properties":{"database":{"type":"string","title":"Database"},"collection":{"type":"string","title":"Collection"},"uri":{"type":"string","pattern":"^mongodb(?:\\+srv)?:\\/\\/(?:([^:@/\\s]+)(?::([^@/\\s]*))?@)?([^/\\s:,]+)(?::(\\d+))?(?:,([^/\\s:,]+)(?::(\\d+))?)*(?:\\/([^/\\s?]*))?(?:\\?([^/\\s]*))?$","title":"Uri"}},"type":"object","required":["database","collection","uri"],"title":"MongoDBConnectorConfigInput"},"Neo4jDestinationConnectorConfig":{"properties":{"uri":{"type":"string","title":"Uri"},"database":{"type":"string","title":"Database"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["uri","database","username","password","batch_size"],"title":"Neo4jDestinationConnectorConfig"},"Neo4jDestinationConnectorConfigInput":{"properties":{"uri":{"type":"string","title":"Uri"},"database":{"type":"string","title":"Database"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":100}},"type":"object","required":["uri","database","username","password"],"title":"Neo4jDestinationConnectorConfigInput"},"NodeFileMetadata":{"properties":{"node_id":{"type":"string","format":"uuid","title":"Node Id"},"file_id":{"type":"string","title":"File Id"}},"type":"object","required":["node_id","file_id"],"title":"NodeFileMetadata"},"OneDriveDestinationConnectorConfig":{"properties":{"client_id":{"type":"string","title":"Client Id"},"user_pname":{"type":"string","title":"User Pname"},"tenant":{"type":"string","title":"Tenant"},"authority_url":{"type":"string","title":"Authority Url"},"client_cred":{"type":"string","title":"Client Cred"},"remote_url":{"type":"string","title":"Remote Url"}},"type":"object","required":["client_id","user_pname","tenant","authority_url","client_cred","remote_url"],"title":"OneDriveDestinationConnectorConfig"},"OneDriveDestinationConnectorConfigInput":{"properties":{"client_id":{"type":"string","title":"Client Id"},"user_pname":{"type":"string","title":"User Pname"},"tenant":{"type":"string","title":"Tenant"},"authority_url":{"type":"string","title":"Authority Url"},"client_cred":{"type":"string","title":"Client Cred"},"remote_url":{"type":"string","pattern":"^onedrive:\\/\\/","title":"Remote Url"}},"type":"object","required":["client_id","user_pname","tenant","authority_url","client_cred","remote_url"],"title":"OneDriveDestinationConnectorConfigInput"},"OneDriveSourceConnectorConfig":{"properties":{"client_id":{"type":"string","title":"Client Id"},"user_pname":{"type":"string","title":"User Pname"},"tenant":{"type":"string","title":"Tenant"},"authority_url":{"type":"string","title":"Authority Url"},"client_cred":{"type":"string","title":"Client Cred"},"recursive":{"type":"boolean","title":"Recursive"},"path":{"type":"string","title":"Path"}},"type":"object","required":["client_id","user_pname","tenant","authority_url","client_cred","recursive","path"],"title":"OneDriveSourceConnectorConfig"},"OneDriveSourceConnectorConfigInput":{"properties":{"client_id":{"type":"string","title":"Client Id"},"user_pname":{"type":"string","title":"User Pname"},"tenant":{"type":"string","title":"Tenant"},"authority_url":{"type":"string","title":"Authority Url"},"client_cred":{"type":"string","title":"Client Cred"},"recursive":{"type":"boolean","title":"Recursive","default":false},"path":{"type":"string","title":"Path"}},"type":"object","required":["client_id","user_pname","tenant","authority_url","client_cred","path"],"title":"OneDriveSourceConnectorConfigInput"},"OutlookSourceConnectorConfig":{"properties":{"authority_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authority Url"},"tenant":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Tenant"},"client_id":{"type":"string","title":"Client Id"},"client_cred":{"type":"string","title":"Client Cred"},"outlook_folders":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Outlook Folders"},"recursive":{"type":"boolean","title":"Recursive"},"user_email":{"type":"string","title":"User Email"}},"type":"object","required":["client_id","client_cred","recursive","user_email"],"title":"OutlookSourceConnectorConfig"},"OutlookSourceConnectorConfigInput":{"properties":{"authority_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Authority Url"},"tenant":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Tenant"},"client_id":{"type":"string","title":"Client Id"},"client_cred":{"type":"string","title":"Client Cred"},"outlook_folders":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Outlook Folders"},"recursive":{"type":"boolean","title":"Recursive","default":false},"user_email":{"type":"string","title":"User Email"}},"type":"object","required":["client_id","client_cred","user_email"],"title":"OutlookSourceConnectorConfigInput"},"PineconeDestinationConnectorConfig":{"properties":{"index_name":{"type":"string","title":"Index Name"},"api_key":{"type":"string","title":"Api Key"},"namespace":{"type":"string","title":"Namespace"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["index_name","api_key","namespace","batch_size"],"title":"PineconeDestinationConnectorConfig"},"PineconeDestinationConnectorConfigInput":{"properties":{"index_name":{"type":"string","title":"Index Name"},"api_key":{"type":"string","title":"Api Key"},"namespace":{"type":"string","title":"Namespace"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":50}},"type":"object","required":["index_name","api_key","namespace"],"title":"PineconeDestinationConnectorConfigInput"},"PostgresDestinationConnectorConfig":{"properties":{"host":{"type":"string","title":"Host"},"database":{"type":"string","title":"Database"},"port":{"type":"integer","title":"Port"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"table_name":{"type":"string","title":"Table Name"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["host","database","port","username","password","table_name","batch_size"],"title":"PostgresDestinationConnectorConfig"},"PostgresDestinationConnectorConfigInput":{"properties":{"host":{"type":"string","title":"Host"},"database":{"type":"string","title":"Database"},"port":{"type":"integer","title":"Port"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"table_name":{"type":"string","title":"Table Name"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["host","database","port","username","password","table_name","batch_size"],"title":"PostgresDestinationConnectorConfigInput"},"PostgresSourceConnectorConfig":{"properties":{"host":{"type":"string","title":"Host"},"database":{"type":"string","title":"Database"},"port":{"type":"integer","title":"Port"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"table_name":{"type":"string","title":"Table Name"},"batch_size":{"type":"integer","title":"Batch Size"},"id_column":{"type":"string","title":"Id Column"},"fields":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Fields"}},"type":"object","required":["host","database","port","username","password","table_name","batch_size","id_column"],"title":"PostgresSourceConnectorConfig"},"PostgresSourceConnectorConfigInput":{"properties":{"host":{"type":"string","title":"Host"},"database":{"type":"string","title":"Database"},"port":{"type":"integer","title":"Port"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"table_name":{"type":"string","title":"Table Name"},"batch_size":{"type":"integer","title":"Batch Size"},"id_column":{"type":"string","title":"Id Column","default":"id"},"fields":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Fields"}},"type":"object","required":["host","database","port","username","password","table_name","batch_size"],"title":"PostgresSourceConnectorConfigInput"},"QdrantCloudDestinationConnectorConfig":{"properties":{"url":{"type":"string","title":"Url"},"api_key":{"type":"string","title":"Api Key"},"collection_name":{"type":"string","title":"Collection Name"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["url","api_key","collection_name","batch_size"],"title":"QdrantCloudDestinationConnectorConfig"},"QdrantCloudDestinationConnectorConfigInput":{"properties":{"url":{"type":"string","title":"Url"},"api_key":{"type":"string","title":"Api Key"},"collection_name":{"type":"string","title":"Collection Name"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":50}},"type":"object","required":["url","api_key","collection_name"],"title":"QdrantCloudDestinationConnectorConfigInput"},"RedisDestinationConnectorConfig":{"properties":{"host":{"type":"string","title":"Host"},"port":{"type":"integer","title":"Port"},"username":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"uri":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Uri"},"database":{"type":"integer","title":"Database"},"ssl":{"type":"boolean","title":"Ssl"},"batch_size":{"type":"integer","title":"Batch Size"}},"type":"object","required":["host","port","database","ssl","batch_size"],"title":"RedisDestinationConnectorConfig"},"RedisDestinationConnectorConfigInput":{"properties":{"host":{"type":"string","title":"Host"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":6379},"username":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Username"},"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password"},"uri":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Uri"},"database":{"type":"integer","minimum":0.0,"title":"Database","default":0},"ssl":{"type":"boolean","title":"Ssl","default":true},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":100}},"type":"object","required":["host"],"title":"RedisDestinationConnectorConfigInput"},"S3DestinationConnectorConfig":{"properties":{"remote_url":{"type":"string","title":"Remote Url"},"anonymous":{"type":"boolean","title":"Anonymous"},"key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Key"},"secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Secret"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"endpoint_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Endpoint Url"}},"type":"object","required":["remote_url","anonymous"],"title":"S3DestinationConnectorConfig"},"S3DestinationConnectorConfigInput":{"properties":{"remote_url":{"type":"string","pattern":"^s3:\\/\\/","title":"Remote Url"},"anonymous":{"type":"boolean","title":"Anonymous","default":false},"key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Key"},"secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Secret"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"endpoint_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Endpoint Url"}},"type":"object","required":["remote_url"],"title":"S3DestinationConnectorConfigInput"},"S3SourceConnectorConfig":{"properties":{"remote_url":{"type":"string","title":"Remote Url"},"anonymous":{"type":"boolean","title":"Anonymous"},"key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Key"},"secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Secret"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"endpoint_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Endpoint Url"},"recursive":{"type":"boolean","title":"Recursive"}},"type":"object","required":["remote_url","anonymous","recursive"],"title":"S3SourceConnectorConfig"},"S3SourceConnectorConfigInput":{"properties":{"remote_url":{"type":"string","pattern":"^s3:\\/\\/","title":"Remote Url"},"anonymous":{"type":"boolean","title":"Anonymous","default":false},"key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Key"},"secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Secret"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token"},"endpoint_url":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Endpoint Url"},"recursive":{"type":"boolean","title":"Recursive","default":true}},"type":"object","required":["remote_url"],"title":"S3SourceConnectorConfigInput"},"SalesforceSourceConnectorConfig":{"properties":{"username":{"type":"string","title":"Username"},"consumer_key":{"type":"string","title":"Consumer Key"},"private_key":{"type":"string","title":"Private Key"},"categories":{"items":{"type":"string"},"type":"array","title":"Categories"}},"type":"object","required":["username","consumer_key","private_key","categories"],"title":"SalesforceSourceConnectorConfig"},"SalesforceSourceConnectorConfigInput":{"properties":{"username":{"type":"string","title":"Username"},"consumer_key":{"type":"string","title":"Consumer Key"},"private_key":{"type":"string","title":"Private Key"},"categories":{"items":{"type":"string"},"type":"array","title":"Categories"}},"type":"object","required":["username","consumer_key","private_key","categories"],"title":"SalesforceSourceConnectorConfigInput"},"SecretReference":{"properties":{"id":{"type":"string","title":"Id"},"type":{"$ref":"#/components/schemas/EncryptionType","default":"rsa"}},"type":"object","required":["id"],"title":"SecretReference"},"SharePointSourceConnectorConfig":{"properties":{"site":{"type":"string","title":"Site"},"tenant":{"type":"string","title":"Tenant"},"authority_url":{"type":"string","title":"Authority Url","default":"https://login.microsoftonline.com"},"user_pname":{"type":"string","title":"User Pname"},"client_id":{"type":"string","title":"Client Id"},"client_cred":{"type":"string","title":"Client Cred"},"recursive":{"type":"boolean","title":"Recursive"},"path":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Path"}},"type":"object","required":["site","tenant","user_pname","client_id","client_cred","recursive"],"title":"SharePointSourceConnectorConfig"},"SharePointSourceConnectorConfigInput":{"properties":{"site":{"type":"string","pattern":"^https:\\/\\/([a-zA-Z0-9-]+)(-admin)?\\.sharepoint\\.com(\\/sites\\/[a-zA-Z0-9-_]+)?$","title":"Site"},"tenant":{"type":"string","title":"Tenant"},"authority_url":{"type":"string","title":"Authority Url","default":"https://login.microsoftonline.com"},"user_pname":{"type":"string","title":"User Pname"},"client_id":{"type":"string","title":"Client Id"},"client_cred":{"type":"string","title":"Client Cred"},"recursive":{"type":"boolean","title":"Recursive","default":false},"path":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Path"}},"type":"object","required":["site","tenant","user_pname","client_id","client_cred"],"title":"SharePointSourceConnectorConfigInput"},"SnowflakeDestinationConnectorConfig":{"properties":{"account":{"type":"string","title":"Account"},"role":{"type":"string","title":"Role"},"user":{"type":"string","title":"User"},"password":{"type":"string","title":"Password"},"host":{"type":"string","title":"Host"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":443},"database":{"type":"string","title":"Database"},"schema":{"type":"string","title":"Schema"},"table_name":{"type":"string","title":"Table Name","default":"elements"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":50},"record_id_key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Record Id Key","default":"record_id"}},"type":"object","required":["account","role","user","password","host","database"],"title":"SnowflakeDestinationConnectorConfig"},"SnowflakeDestinationConnectorConfigInput":{"properties":{"account":{"type":"string","title":"Account"},"role":{"type":"string","title":"Role"},"user":{"type":"string","title":"User"},"password":{"type":"string","title":"Password"},"host":{"type":"string","title":"Host"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":443},"database":{"type":"string","title":"Database"},"schema":{"type":"string","title":"Schema"},"table_name":{"type":"string","title":"Table Name","default":"elements"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":50},"record_id_key":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Record Id Key","default":"record_id"}},"type":"object","required":["account","role","user","password","host","database"],"title":"SnowflakeDestinationConnectorConfigInput"},"SnowflakeSourceConnectorConfig":{"properties":{"account":{"type":"string","title":"Account"},"role":{"type":"string","title":"Role"},"user":{"type":"string","title":"User"},"password":{"type":"string","title":"Password"},"host":{"type":"string","title":"Host"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":443},"database":{"type":"string","title":"Database"},"schema":{"type":"string","title":"Schema"},"table_name":{"type":"string","title":"Table Name"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":100},"id_column":{"type":"string","title":"Id Column"},"fields":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Fields"}},"type":"object","required":["account","role","user","password","host","database","table_name","id_column"],"title":"SnowflakeSourceConnectorConfig"},"SnowflakeSourceConnectorConfigInput":{"properties":{"account":{"type":"string","title":"Account"},"role":{"type":"string","title":"Role"},"user":{"type":"string","title":"User"},"password":{"type":"string","title":"Password"},"host":{"type":"string","title":"Host"},"port":{"type":"integer","minimum":1.0,"title":"Port","default":443},"database":{"type":"string","title":"Database"},"schema":{"type":"string","title":"Schema"},"table_name":{"type":"string","title":"Table Name"},"batch_size":{"type":"integer","minimum":1.0,"title":"Batch Size","default":100},"id_column":{"type":"string","title":"Id Column"},"fields":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Fields"}},"type":"object","required":["account","role","user","password","host","database","table_name","id_column"],"title":"SnowflakeSourceConnectorConfigInput"},"SortDirection":{"type":"string","enum":["asc","desc"],"title":"SortDirection"},"SourceConnectorInformation":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"type":"string","title":"Name"},"type":{"$ref":"#/components/schemas/SourceConnectorType"},"config":{"anyOf":[{"$ref":"#/components/schemas/AzureSourceConnectorConfig"},{"$ref":"#/components/schemas/BoxSourceConnectorConfig"},{"$ref":"#/components/schemas/ConfluenceSourceConnectorConfig"},{"$ref":"#/components/schemas/CouchbaseSourceConnectorConfig"},{"$ref":"#/components/schemas/DatabricksVolumesConnectorConfig"},{"$ref":"#/components/schemas/DropboxSourceConnectorConfig"},{"$ref":"#/components/schemas/ElasticsearchConnectorConfig"},{"$ref":"#/components/schemas/GCSSourceConnectorConfig"},{"$ref":"#/components/schemas/GoogleDriveSourceConnectorConfig"},{"$ref":"#/components/schemas/KafkaCloudSourceConnectorConfig"},{"$ref":"#/components/schemas/MongoDBConnectorConfig"},{"$ref":"#/components/schemas/OneDriveSourceConnectorConfig"},{"$ref":"#/components/schemas/OutlookSourceConnectorConfig"},{"$ref":"#/components/schemas/PostgresSourceConnectorConfig"},{"$ref":"#/components/schemas/S3SourceConnectorConfig"},{"$ref":"#/components/schemas/SalesforceSourceConnectorConfig"},{"$ref":"#/components/schemas/SharePointSourceConnectorConfig"},{"$ref":"#/components/schemas/SnowflakeSourceConnectorConfig"},{"$ref":"#/components/schemas/JiraSourceConnectorConfig"},{"$ref":"#/components/schemas/ZendeskSourceConnectorConfig"},{"additionalProperties":true,"type":"object"}],"title":"Config"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Updated At"}},"type":"object","required":["id","name","type","config","created_at"],"title":"SourceConnectorInformation"},"SourceConnectorType":{"type":"string","enum":["azure","box","confluence","couchbase","databricks_volumes","dropbox","elasticsearch","gcs","google_drive","kafka-cloud","mongodb","onedrive","outlook","postgres","s3","salesforce","sharepoint","slack","snowflake","jira","zendesk"],"title":"SourceConnectorType"},"UpdateDestinationConnector":{"properties":{"config":{"anyOf":[{"$ref":"#/components/schemas/AstraDBConnectorConfigInput"},{"$ref":"#/components/schemas/AzureAISearchConnectorConfigInput"},{"$ref":"#/components/schemas/CouchbaseDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/DatabricksVolumesConnectorConfigInput"},{"$ref":"#/components/schemas/DatabricksVDTDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/DeltaTableConnectorConfigInput"},{"$ref":"#/components/schemas/ElasticsearchConnectorConfigInput"},{"$ref":"#/components/schemas/GCSDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/KafkaCloudDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/MilvusDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/MongoDBConnectorConfigInput"},{"$ref":"#/components/schemas/Neo4jDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/OneDriveDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/PineconeDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/PostgresDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/RedisDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/QdrantCloudDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/S3DestinationConnectorConfigInput"},{"$ref":"#/components/schemas/SnowflakeDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/WeaviateDestinationConnectorConfigInput"},{"$ref":"#/components/schemas/IBMWatsonxS3DestinationConnectorConfigInput"},{"additionalProperties":true,"type":"object"}],"title":"Config"}},"type":"object","required":["config"],"title":"UpdateDestinationConnector"},"UpdateSourceConnector":{"properties":{"config":{"anyOf":[{"$ref":"#/components/schemas/AzureSourceConnectorConfigInput"},{"$ref":"#/components/schemas/BoxSourceConnectorConfigInput"},{"$ref":"#/components/schemas/ConfluenceSourceConnectorConfigInput"},{"$ref":"#/components/schemas/CouchbaseSourceConnectorConfigInput"},{"$ref":"#/components/schemas/DatabricksVolumesConnectorConfigInput"},{"$ref":"#/components/schemas/DropboxSourceConnectorConfigInput"},{"$ref":"#/components/schemas/ElasticsearchConnectorConfigInput"},{"$ref":"#/components/schemas/GCSSourceConnectorConfigInput"},{"$ref":"#/components/schemas/GoogleDriveSourceConnectorConfigInput"},{"$ref":"#/components/schemas/KafkaCloudSourceConnectorConfigInput"},{"$ref":"#/components/schemas/MongoDBConnectorConfigInput"},{"$ref":"#/components/schemas/OneDriveSourceConnectorConfigInput"},{"$ref":"#/components/schemas/OutlookSourceConnectorConfigInput"},{"$ref":"#/components/schemas/PostgresSourceConnectorConfigInput"},{"$ref":"#/components/schemas/S3SourceConnectorConfigInput"},{"$ref":"#/components/schemas/SalesforceSourceConnectorConfigInput"},{"$ref":"#/components/schemas/SharePointSourceConnectorConfigInput"},{"$ref":"#/components/schemas/SnowflakeSourceConnectorConfigInput"},{"$ref":"#/components/schemas/JiraSourceConnectorConfigInput"},{"$ref":"#/components/schemas/ZendeskSourceConnectorConfigInput"},{"additionalProperties":true,"type":"object"}],"title":"Config"}},"type":"object","required":["config"],"title":"UpdateSourceConnector"},"UpdateWorkflow":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"source_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Source Id"},"destination_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Destination Id"},"workflow_type":{"anyOf":[{"$ref":"#/components/schemas/WorkflowType"},{"type":"null"}]},"workflow_nodes":{"anyOf":[{"items":{"$ref":"#/components/schemas/WorkflowNode"},"type":"array"},{"type":"null"}],"title":"Workflow Nodes"},"schedule":{"anyOf":[{"type":"string","enum":["every 15 minutes","every hour","every 2 hours","every 4 hours","every 6 hours","every 8 hours","every 10 hours","every 12 hours","daily","weekly","monthly"]},{"type":"null"}],"title":"Schedule"},"reprocess_all":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Reprocess All"}},"type":"object","title":"UpdateWorkflow"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"WeaviateDestinationConnectorConfig":{"properties":{"cluster_url":{"type":"string","title":"Cluster Url"},"api_key":{"type":"string","title":"Api Key"},"collection":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Collection"}},"type":"object","required":["cluster_url","api_key"],"title":"WeaviateDestinationConnectorConfig"},"WeaviateDestinationConnectorConfigInput":{"properties":{"cluster_url":{"type":"string","title":"Cluster Url"},"api_key":{"type":"string","title":"Api Key"},"collection":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Collection"}},"type":"object","required":["cluster_url","api_key"],"title":"WeaviateDestinationConnectorConfigInput"},"WorkflowInformation":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"type":"string","title":"Name"},"sources":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Sources"},"destinations":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Destinations"},"workflow_type":{"anyOf":[{"$ref":"#/components/schemas/WorkflowType"},{"type":"null"}]},"workflow_nodes":{"items":{"$ref":"#/components/schemas/WorkflowNode"},"type":"array","title":"Workflow Nodes"},"schedule":{"anyOf":[{"$ref":"#/components/schemas/WorkflowSchedule"},{"type":"null"}]},"status":{"$ref":"#/components/schemas/WorkflowState"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Updated At"},"reprocess_all":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Reprocess All"}},"type":"object","required":["id","name","sources","destinations","workflow_nodes","status","created_at"],"title":"WorkflowInformation"},"WorkflowJobType":{"type":"string","enum":["ephemeral","persistent","scheduled"],"title":"WorkflowJobType"},"WorkflowNode":{"properties":{"id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Id"},"name":{"type":"string","title":"Name"},"type":{"type":"string","title":"Type"},"subtype":{"type":"string","title":"Subtype"},"settings":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Settings"}},"type":"object","required":["name","type","subtype"],"title":"WorkflowNode"},"WorkflowSchedule":{"properties":{"crontab_entries":{"items":{"$ref":"#/components/schemas/CronTabEntry"},"type":"array","title":"Crontab Entries"}},"type":"object","required":["crontab_entries"],"title":"WorkflowSchedule","examples":[{"crontab_entries":[{"cron_expression":"0 0 * * *"}]}]},"WorkflowState":{"type":"string","enum":["active","inactive"],"title":"WorkflowState"},"WorkflowType":{"type":"string","enum":["basic","advanced","platinum","custom"],"title":"WorkflowType"},"ZendeskSourceConnectorConfig":{"properties":{"subdomain":{"type":"string","title":"Subdomain"},"email":{"type":"string","title":"Email"},"api_token":{"type":"string","title":"Api Token"},"item_type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Item Type","default":"tickets"},"batch_size":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Batch Size","default":2}},"type":"object","required":["subdomain","email","api_token"],"title":"ZendeskSourceConnectorConfig"},"ZendeskSourceConnectorConfigInput":{"properties":{"subdomain":{"type":"string","title":"Subdomain"},"email":{"type":"string","title":"Email"},"api_token":{"type":"string","title":"Api Token"},"item_type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Item Type","default":"tickets"},"batch_size":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Batch Size","default":2}},"type":"object","required":["subdomain","email","api_token"],"title":"ZendeskSourceConnectorConfigInput"}}}} \ No newline at end of file diff --git a/partitioner_auto.go b/partitioner_auto.go new file mode 100644 index 0000000..4b91f69 --- /dev/null +++ b/partitioner_auto.go @@ -0,0 +1,57 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// PartitionerAuto is a partitioner that uses the Auto strategy. +type PartitionerAuto struct { + ID string `json:"-"` + Name string `json:"-"` + Strategy string `json:"strategy"` + Provider Provider `json:"provider,omitempty"` + ProviderAPIKey string `json:"provider_api_key,omitempty"` + Model Model `json:"model,omitempty"` + OutputFormat OutputFormat `json:"output_format,omitempty"` + Prompt struct { + Text string `json:"text,omitempty"` + } `json:"prompt,omitzero"` + FormatHTML *bool `json:"format_html,omitzero"` + UniqueElementIDs *bool `json:"unique_element_ids,omitzero"` + IsDynamic bool `json:"is_dynamic"` + AllowFast bool `json:"allow_fast"` +} + +var _ WorkflowNode = new(PartitionerAuto) + +// MarshalJSON implements the json.Marshaler interface. +func (p PartitionerAuto) MarshalJSON() ([]byte, error) { + type alias PartitionerAuto + + data, err := json.Marshal(struct { + alias + Strategy string `json:"strategy"` + }{ + alias: alias(p), + Strategy: PartitionerStrategyAuto, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner auto: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: p.ID, + Name: p.Name, + Type: nodeTypePartition, + Subtype: PartitionerStrategyVLM, + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner auto header: %w", err) + } + + return headerData, nil +} + +func (p *PartitionerAuto) isNode() {} diff --git a/partitioner_fast.go b/partitioner_fast.go new file mode 100644 index 0000000..a42787d --- /dev/null +++ b/partitioner_fast.go @@ -0,0 +1,55 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// PartitionerFast represents a fast partitioner configuration for document processing. +type PartitionerFast struct { + ID string `json:"-"` + Name string `json:"-"` + PageBreaks bool `json:"include_page_breaks,omitzero"` + PDFInferTableStructure bool `json:"pdf_infer_table_structure,omitzero"` + ExcludeElements []ExcludeableElement `json:"exclude_elements,omitzero"` + XMLKeepTags bool `json:"xml_keep_tags,omitzero"` + Encoding Encoding `json:"encoding,omitzero"` + OCRLanguages []Language `json:"ocr_languages,omitzero"` + ExtractImageBlockTypes []BlockType `json:"extract_image_block_types,omitzero"` + InferTableStructure bool `json:"infer_table_structure,omitzero"` +} + +var _ WorkflowNode = new(PartitionerFast) + +// MarshalJSON implements the json.Marshaler interface for PartitionerFast. +func (p PartitionerFast) MarshalJSON() ([]byte, error) { + type alias PartitionerFast + + mask := struct { + Strategy string `json:"strategy"` + alias + }{ + Strategy: PartitionerStrategyFast, + alias: alias(p), + } + + data, err := json.Marshal(mask) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner fast: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: p.ID, + Name: p.Name, + Type: nodeTypePartition, + Subtype: string(PartitionerStrategyFast), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner fast header: %w", err) + } + + return headerData, nil +} + +func (p *PartitionerFast) isNode() {} diff --git a/partitioner_hi_res.go b/partitioner_hi_res.go new file mode 100644 index 0000000..3e51b05 --- /dev/null +++ b/partitioner_hi_res.go @@ -0,0 +1,55 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// PartitionerHiRes represents a high-resolution partitioner configuration for document processing. +type PartitionerHiRes struct { + ID string `json:"-"` + Name string `json:"-"` + PageBreaks bool `json:"include_page_breaks,omitzero"` + PDFInferTableStructure bool `json:"pdf_infer_table_structure,omitzero"` + ExcludeElements []ExcludeableElement `json:"exclude_elements,omitzero"` + XMLKeepTags bool `json:"xml_keep_tags,omitzero"` + Encoding Encoding `json:"encoding,omitzero"` + OCRLanguages []Language `json:"ocr_languages,omitzero"` + ExtractImageBlockTypes []BlockType `json:"extract_image_block_types,omitzero"` + InferTableStructure bool `json:"infer_table_structure,omitzero"` +} + +var _ WorkflowNode = new(PartitionerHiRes) + +// MarshalJSON implements the json.Marshaler interface for PartitionerHiRes. +func (p PartitionerHiRes) MarshalJSON() ([]byte, error) { + type alias PartitionerHiRes + + mask := struct { + Strategy string `json:"strategy"` + alias + }{ + Strategy: PartitionerStrategyHiRes, + alias: alias(p), + } + + data, err := json.Marshal(mask) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner hi res: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: p.ID, + Name: p.Name, + Type: nodeTypePartition, + Subtype: string(PartitionerStrategyHiRes), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner hi res header: %w", err) + } + + return headerData, nil +} + +func (p *PartitionerHiRes) isNode() {} diff --git a/partitioner_type.go b/partitioner_type.go new file mode 100644 index 0000000..6f4bd0a --- /dev/null +++ b/partitioner_type.go @@ -0,0 +1,62 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// OutputFormat represents the output format for document processing. +type OutputFormat string + +// Output format constants. +const ( + OutputFormatHTML OutputFormat = "text/html" + OutputFormatJSON OutputFormat = "application/json" +) + +// Partitioner strategy constants. +const ( + PartitionerStrategyAuto = "auto" + PartitionerStrategyVLM = "vlm" + PartitionerStrategyHiRes = "hi_res" + PartitionerStrategyFast = "fast" +) + +func unmarshalPartitioner(header header) (WorkflowNode, error) { + var partitioner WorkflowNode + + switch header.Subtype { + case PartitionerStrategyAuto: + partitioner = &PartitionerAuto{ + ID: header.ID, + Name: header.Name, + } + + case PartitionerStrategyVLM: + partitioner = &PartitionerVLM{ + ID: header.ID, + Name: header.Name, + } + + case PartitionerStrategyHiRes: + partitioner = &PartitionerHiRes{ + ID: header.ID, + Name: header.Name, + } + + case PartitionerStrategyFast: + partitioner = &PartitionerFast{ + ID: header.ID, + Name: header.Name, + } + + default: + return nil, fmt.Errorf("unknown partitioner strategy: %s", header.Subtype) + } + + if err := json.Unmarshal(header.Settings, partitioner); err != nil { + return nil, fmt.Errorf("failed to unmarshal partitioner node: %w", err) + } + + return partitioner, nil +} diff --git a/partitioner_vlm.go b/partitioner_vlm.go new file mode 100644 index 0000000..e6227d0 --- /dev/null +++ b/partitioner_vlm.go @@ -0,0 +1,51 @@ +package unstructured + +import ( + "encoding/json" + "fmt" +) + +// PartitionerVLM is a partitioner that uses the VLM strategy. +type PartitionerVLM struct { + ID string `json:"-"` + Name string `json:"-"` + Strategy string `json:"strategy,omitempty"` + Provider Provider `json:"provider,omitempty"` + ProviderAPIKey string `json:"provider_api_key,omitempty"` + Model Model `json:"model,omitempty"` + OutputFormat OutputFormat `json:"output_format,omitempty"` + Prompt struct { + Text string `json:"text,omitempty"` + } `json:"prompt,omitzero"` + FormatHTML *bool `json:"format_html,omitzero"` + UniqueElementIDs *bool `json:"unique_element_ids,omitzero"` + IsDynamic *bool `json:"is_dynamic,omitzero"` + AllowFast *bool `json:"allow_fast,omitzero"` +} + +var _ WorkflowNode = new(PartitionerVLM) + +// MarshalJSON implements the json.Marshaler interface. +func (p PartitionerVLM) MarshalJSON() ([]byte, error) { + type alias PartitionerVLM + + data, err := json.Marshal(alias(p)) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner vlm: %w", err) + } + + headerData, err := json.Marshal(header{ + ID: p.ID, + Name: p.Name, + Type: PartitionerStrategyVLM, + Subtype: string(nodeTypePartition), + Settings: json.RawMessage(data), + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal partitioner vlm header: %w", err) + } + + return headerData, nil +} + +func (p *PartitionerVLM) isNode() {} diff --git a/pointers.go b/pointers.go index fea0bd8..be71427 100644 --- a/pointers.go +++ b/pointers.go @@ -47,3 +47,20 @@ func ToInt(p *int) int { return *p } + +// Ptr returns a pointer to the given value. +// This is useful when you need to pass optional values to API requests. +func Ptr[T any](v T) *T { + return &v +} + +// ToVal converts a pointer to a value. +// If the pointer is nil, it returns the zero value of the type. +func ToVal[T any](p *T) T { + if p == nil { + var val T + return val + } + + return *p +} diff --git a/provider_models.go b/provider_models.go new file mode 100644 index 0000000..e17d123 --- /dev/null +++ b/provider_models.go @@ -0,0 +1,54 @@ +package unstructured + +// Provider represents an AI model provider. +type Provider string + +// Provider constants. +const ( + ProviderAuto Provider = "auto" + ProviderAnthropic Provider = "anthropic" + ProviderOpenAI Provider = "openai" + ProviderBedrock Provider = "bedrock" +) + +// Model represents an AI model identifier. +type Model string + +// Model constants. +const ( + ModelGPT4o Model = "gpt-4o" + ModelGPT4oMini Model = "gpt-4o-mini" + ModelClaude35Sonnet Model = "claude-3-5-sonnet-20241022" + ModelClaude37Sonnet Model = "claude-3-7-sonnet-20250219" + ModelBedrockNovaLite Model = "us.amazon.nova-lite-v1:0" + ModelBedrockNovaPro Model = "us.amazon.nova-pro-v1:0" + ModelBedrockClaude3Opus Model = "us.anthropic.claude-3-opus-20240229-v1:0" + ModelBedrockClaude3Haiku Model = "us.anthropic.claude-3-haiku-20240307-v1:0" + ModelBedrockClaude3Sonnet Model = "us.anthropic.claude-3-sonnet-20240229-v1:0" + ModelBedrockClaude35Sonnet Model = "us.anthropic.claude-3-5-sonnet-20241022-v2:0" + ModelBedrockLlama3211B Model = "us.meta.llama3-2-11b-instruct-v1:0" + ModelBedrockLlama3290B Model = "us.meta.llama3-2-90b-instruct-v1:0" +) + +func init() { var _ = providerModels } + +var providerModels = map[Provider][]Model{ + ProviderOpenAI: { + ModelGPT4o, + ModelGPT4oMini, + }, + ProviderAnthropic: { + ModelClaude35Sonnet, + ModelClaude37Sonnet, + }, + ProviderBedrock: { + ModelBedrockNovaLite, + ModelBedrockNovaPro, + ModelBedrockClaude3Opus, + ModelBedrockClaude3Haiku, + ModelBedrockClaude3Sonnet, + ModelBedrockClaude35Sonnet, + ModelBedrockLlama3211B, + ModelBedrockLlama3290B, + }, +} diff --git a/source.go b/source.go index 68232ca..d3ff3e0 100644 --- a/source.go +++ b/source.go @@ -39,7 +39,6 @@ type Source struct { Name string `json:"name"` CreatedAt time.Time `json:"created_at,omitzero"` UpdatedAt time.Time `json:"updated_at,omitzero"` - Type string `json:"type"` Config SourceConfig `json:"config"` } @@ -64,7 +63,6 @@ func (s *Source) UnmarshalJSON(data []byte) error { s.Name = shadow.Name s.CreatedAt = shadow.CreatedAt s.UpdatedAt = shadow.UpdatedAt - s.Type = shadow.Type // Look up the factory function for this source type factory, exists := sourceConfigFactories[shadow.Type] diff --git a/source_connection_check.go b/source_connection_check.go index 70f744a..bddded2 100644 --- a/source_connection_check.go +++ b/source_connection_check.go @@ -11,7 +11,7 @@ import ( func (c *Client) CreateSourceConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error) { req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/sources", id, "connection-check").String(), + c.endpoint.JoinPath("sources", id, "connection-check").String(), nil, ) if err != nil { @@ -31,7 +31,7 @@ func (c *Client) CreateSourceConnectionCheck(ctx context.Context, id string) (*D func (c *Client) GetSourceConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/sources", id, "connection-check").String(), + c.endpoint.JoinPath("sources", id, "connection-check").String(), nil, ) if err != nil { diff --git a/source_create.go b/source_create.go index 5c12f38..364b960 100644 --- a/source_create.go +++ b/source_create.go @@ -47,7 +47,7 @@ func (c *Client) CreateSource(ctx context.Context, in CreateSourceRequest) (*Sou req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/sources").String(), + c.endpoint.JoinPath("sources/").String(), bytes.NewReader(body), ) if err != nil { diff --git a/source_create_test.go b/source_create_test.go new file mode 100644 index 0000000..f2256eb --- /dev/null +++ b/source_create_test.go @@ -0,0 +1,64 @@ +package unstructured + +import ( + "errors" + "net/http" + "testing" + "time" +) + +func TestCreateSource(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.CreateSource = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`{` + + ` "config": {` + + ` "client_id": "foo",` + + ` "tenant": "foo",` + + ` "authority_url": "foo",` + + ` "user_pname": "foo",` + + ` "client_cred": "foo",` + + ` "recursive": false,` + + ` "path": "foo"` + + ` },` + + ` "created_at": "2023-09-15T01:06:53.146Z",` + + ` "id": "a15d4161-77a0-4e08-b65e-86f398ce15ad",` + + ` "name": "test_source_name",` + + ` "type": "onedrive"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + source, err := client.CreateSource(t.Context(), CreateSourceRequest{ + Name: "test_source_name", + Config: &OneDriveSourceConnectorConfigInput{ + ClientID: "foo", + Tenant: "foo", + AuthorityURL: "foo", + UserPName: "foo", + ClientCred: "foo", + Path: "foo", + }, + }) + if err != nil { + t.Fatalf("failed to create source: %v", err) + } + + if err := errors.Join( + eq("source.id", source.ID, "a15d4161-77a0-4e08-b65e-86f398ce15ad"), + eq("source.name", source.Name, "test_source_name"), + equal("source.created_at", source.CreatedAt, time.Date(2023, 9, 15, 1, 6, 53, 146000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := source.Config.(*OneDriveSourceConnectorConfig) + if !ok { + t.Errorf("expected source config to be %T, got %T", cfg, source.Config) + } +} diff --git a/source_delete.go b/source_delete.go index 93e8b48..251aeb4 100644 --- a/source_delete.go +++ b/source_delete.go @@ -10,7 +10,7 @@ import ( func (c *Client) DeleteSource(ctx context.Context, id string) error { req, err := http.NewRequestWithContext(ctx, http.MethodDelete, - c.endpoint.JoinPath("/sources", id).String(), + c.endpoint.JoinPath("sources", id).String(), nil, ) if err != nil { diff --git a/source_delete_test.go b/source_delete_test.go new file mode 100644 index 0000000..e184f06 --- /dev/null +++ b/source_delete_test.go @@ -0,0 +1,34 @@ +package unstructured + +import ( + "net/http" + "strconv" + "testing" +) + +func TestDeleteSource(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "a15d4161-77a0-4e08-b65e-86f398ce15ad" + + mux.DeleteSource = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "source ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{"detail": "Source with id ` + id + ` successfully deleted."}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + err := client.DeleteSource(t.Context(), id) + if err != nil { + t.Fatalf("failed to delete source: %v", err) + } +} diff --git a/source_get.go b/source_get.go index 0df98cb..cba7bd1 100644 --- a/source_get.go +++ b/source_get.go @@ -10,7 +10,7 @@ import ( func (c *Client) GetSource(ctx context.Context, id string) (*Source, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/sources", id).String(), + c.endpoint.JoinPath("sources", id).String(), nil, ) if err != nil { diff --git a/source_get_test.go b/source_get_test.go new file mode 100644 index 0000000..cc14b82 --- /dev/null +++ b/source_get_test.go @@ -0,0 +1,86 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestGetSource(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "a15d4161-77a0-4e08-b65e-86f398ce15ad" + mux.GetSource = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "source ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "config": {` + + ` "client_id": "foo",` + + ` "tenant": "foo",` + + ` "authority_url": "foo",` + + ` "user_pname": "foo",` + + ` "client_cred": "foo",` + + ` "recursive": false,` + + ` "path": "foo"` + + ` },` + + ` "created_at": "2023-09-15T01:06:53.146Z",` + + ` "id": "` + id + `",` + + ` "name": "test_source_name",` + + ` "type": "onedrive"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + source, err := client.GetSource(t.Context(), id) + if err != nil { + t.Fatalf("failed to get source: %v", err) + } + + if err := errors.Join( + eq("source.id", source.ID, id), + eq("source.name", source.Name, "test_source_name"), + equal("source.created_at", source.CreatedAt, time.Date(2023, 9, 15, 1, 6, 53, 146000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := source.Config.(*OneDriveSourceConnectorConfig) + if !ok { + t.Errorf("expected source config to be %T, got %T", cfg, source.Config) + } +} + +func TestGetSourceNotFound(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "a15d4161-77a0-4e08-b65e-86f398ce15ad" + mux.GetSource = func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "source ID "+r.PathValue("id")+" not found", http.StatusNotFound) + } + + _, err := client.GetSource(t.Context(), id) + if err == nil { + t.Fatalf("expected error, got nil") + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != http.StatusNotFound { + t.Fatalf("expected error code to be %d, got %d", http.StatusNotFound, apierr.Code) + } +} diff --git a/source_list.go b/source_list.go index b40eb16..e9ee151 100644 --- a/source_list.go +++ b/source_list.go @@ -10,7 +10,7 @@ import ( func (c *Client) ListSources(ctx context.Context, typ string) ([]Source, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/sources").String(), + c.endpoint.JoinPath("sources").String(), nil, ) if err != nil { diff --git a/source_list_test.go b/source_list_test.go new file mode 100644 index 0000000..3733ac3 --- /dev/null +++ b/source_list_test.go @@ -0,0 +1,130 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "strings" + "testing" + "time" +) + +func TestListSources(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListSources = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`[` + + ` {` + + ` "config": {` + + ` "client_id": "foo",` + + ` "tenant": "foo",` + + ` "authority_url": "foo",` + + ` "user_pname": "foo",` + + ` "client_cred": "foo",` + + ` "recursive": false,` + + ` "path": "foo"` + + ` },` + + ` "created_at": "2023-09-15T01:06:53.146Z",` + + ` "id": "a15d4161-77a0-4e08-b65e-86f398ce15ad",` + + ` "name": "test_source_name",` + + ` "type": "onedrive"` + + ` }` + + `]`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + sources, err := client.ListSources(t.Context(), "") + if err != nil { + t.Fatalf("failed to list sources: %v", err) + } + + if len(sources) != 1 { + t.Fatalf("expected 1 source, got %d", len(sources)) + } + + source := sources[0] + if err := errors.Join( + eq("source.id", source.ID, "a15d4161-77a0-4e08-b65e-86f398ce15ad"), + eq("source.name", source.Name, "test_source_name"), + equal("source.created_at", source.CreatedAt, time.Date(2023, 9, 15, 1, 6, 53, 146000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := source.Config.(*OneDriveSourceConnectorConfig) + if !ok { + t.Errorf("expected source config to be %T, got %T", cfg, source.Config) + } +} + +func TestListSourcesEmpty(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListSources = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`[]`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + sources, err := client.ListSources(t.Context(), "") + if err != nil { + t.Fatalf("failed to list sources: %v", err) + } + + if len(sources) != 0 { + t.Fatalf("expected 0 sources, got %d", len(sources)) + } +} + +func TestListSourcesErrorCode(t *testing.T) { + t.Parallel() + + for _, code := range []int{ + http.StatusBadRequest, // 400 + http.StatusUnauthorized, // 401 + http.StatusForbidden, // 403 + http.StatusNotFound, // 404 + http.StatusInternalServerError, // 500 + http.StatusBadGateway, // 502 + http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout, // 504 + } { + t.Run(strconv.Itoa(code), func(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListSources = func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + } + + _, err := client.ListSources(t.Context(), "") + if err == nil { + t.Fatalf("expected error, got nil") + } + + if !strings.Contains(err.Error(), "API error occurred") { + t.Fatalf("expected error to contain 'API error occurred', got %v", err) + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != code { + t.Fatalf("expected error code to be %d, got %d", code, apierr.Code) + } + }) + } +} diff --git a/source_update.go b/source_update.go index b54f162..a7a842a 100644 --- a/source_update.go +++ b/source_update.go @@ -35,7 +35,7 @@ func (c *Client) UpdateSource(ctx context.Context, in UpdateSourceRequest) (*Sou req, err := http.NewRequestWithContext(ctx, http.MethodPut, - c.endpoint.JoinPath("/sources", in.ID).String(), + c.endpoint.JoinPath("sources", in.ID).String(), bytes.NewReader(body), ) if err != nil { diff --git a/source_update_test.go b/source_update_test.go new file mode 100644 index 0000000..3bb3bc5 --- /dev/null +++ b/source_update_test.go @@ -0,0 +1,74 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestUpdateSource(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "a15d4161-77a0-4e08-b65e-86f398ce15ad" + + mux.UpdateSource = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "source ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "config": {` + + ` "client_id": "foo",` + + ` "tenant": "foo",` + + ` "authority_url": "foo",` + + ` "user_pname": "foo",` + + ` "client_cred": "foo",` + + ` "recursive": false,` + + ` "path": "foo"` + + ` },` + + ` "created_at": "2023-09-15T01:06:53.146Z",` + + ` "id": "a15d4161-77a0-4e08-b65e-86f398ce15ad",` + + ` "name": "test_source_name",` + + ` "type": "onedrive"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + source, err := client.UpdateSource(t.Context(), UpdateSourceRequest{ + ID: id, + Config: &OneDriveSourceConnectorConfigInput{ + ClientID: "foo", + Tenant: "foo", + AuthorityURL: "foo", + UserPName: "foo", + ClientCred: "foo", + Recursive: Bool(false), + Path: "foo", + }, + }) + if err != nil { + t.Fatalf("failed to update source: %v", err) + } + + if err := errors.Join( + eq("updated_source.id", source.ID, id), + eq("updated_source.name", source.Name, "test_source_name"), + equal("updated_source.created_at", source.CreatedAt, time.Date(2023, 9, 15, 1, 6, 53, 146000000, time.UTC)), + ); err != nil { + t.Error(err) + } + + cfg, ok := source.Config.(*OneDriveSourceConnectorConfig) + if !ok { + t.Errorf("expected source config to be %T, got %T", cfg, source.Config) + } +} diff --git a/test/main.go b/test/main.go new file mode 100644 index 0000000..6f8d303 --- /dev/null +++ b/test/main.go @@ -0,0 +1,111 @@ +// Package test provides testing utilities and examples for the unstructured SDK. +package test + +import ( + "net/http" + "strings" +) + +// FakeAPIKey is a fake API key for testing. +const FakeAPIKey = "91pmLBeETAbXCpNylRsLq11FdiZPTk" + +// Mux is a HTTP server that mocks the Unstructured API. +type Mux struct { + mux *http.ServeMux + + // Destination handlers + CreateDestination func(w http.ResponseWriter, r *http.Request) + ListDestinations func(w http.ResponseWriter, r *http.Request) + GetDestination func(w http.ResponseWriter, r *http.Request) + UpdateDestination func(w http.ResponseWriter, r *http.Request) + DeleteDestination func(w http.ResponseWriter, r *http.Request) + CreateConnectionCheckDestinations func(w http.ResponseWriter, r *http.Request) + GetConnectionCheckDestinations func(w http.ResponseWriter, r *http.Request) + + // Source handlers + ListSources func(w http.ResponseWriter, r *http.Request) + CreateSource func(w http.ResponseWriter, r *http.Request) + GetSource func(w http.ResponseWriter, r *http.Request) + UpdateSource func(w http.ResponseWriter, r *http.Request) + DeleteSource func(w http.ResponseWriter, r *http.Request) + CreateConnectionCheckSources func(w http.ResponseWriter, r *http.Request) + GetConnectionCheckSources func(w http.ResponseWriter, r *http.Request) + + // Job handlers + ListJobs func(w http.ResponseWriter, r *http.Request) + GetJob func(w http.ResponseWriter, r *http.Request) + CancelJob func(w http.ResponseWriter, r *http.Request) + DownloadJobOutput func(w http.ResponseWriter, r *http.Request) + GetJobDetails func(w http.ResponseWriter, r *http.Request) + GetJobFailedFiles func(w http.ResponseWriter, r *http.Request) + + // Workflow handlers + CreateWorkflow func(w http.ResponseWriter, r *http.Request) + ListWorkflows func(w http.ResponseWriter, r *http.Request) + GetWorkflow func(w http.ResponseWriter, r *http.Request) + UpdateWorkflow func(w http.ResponseWriter, r *http.Request) + DeleteWorkflow func(w http.ResponseWriter, r *http.Request) + RunWorkflow func(w http.ResponseWriter, r *http.Request) +} + +func (m *Mux) ServeHTTP(w http.ResponseWriter, r *http.Request) { + r.URL.Path = strings.TrimPrefix(r.URL.Path, "/api/v1") + r.URL.RawPath = r.URL.Path + m.mux.ServeHTTP(w, r) +} + +// NewMux creates a new Mux with all the routes for the API. +func NewMux() *Mux { + m := &Mux{mux: http.NewServeMux()} + + try := func(f *func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + if f != nil && *f != nil { + (*f)(w, r) + } else { + var msg string + if r.Pattern != "" { + msg = "handler for " + r.Pattern + " is nil" + } + + http.Error(w, msg, http.StatusMethodNotAllowed) + } + } + } + + // Destination routes + m.mux.HandleFunc("POST /destinations/", try(&m.CreateDestination)) + m.mux.HandleFunc("GET /destinations/", try(&m.ListDestinations)) + m.mux.HandleFunc("GET /destinations/{id}", try(&m.GetDestination)) + m.mux.HandleFunc("PUT /destinations/{id}", try(&m.UpdateDestination)) + m.mux.HandleFunc("DELETE /destinations/{id}", try(&m.DeleteDestination)) + m.mux.HandleFunc("POST /destinations/{id}/connection-check", try(&m.CreateConnectionCheckDestinations)) + m.mux.HandleFunc("GET /destinations/{id}/connection-check", try(&m.GetConnectionCheckDestinations)) + + // Source routes + m.mux.HandleFunc("GET /sources/", try(&m.ListSources)) + m.mux.HandleFunc("POST /sources/", try(&m.CreateSource)) + m.mux.HandleFunc("GET /sources/{id}", try(&m.GetSource)) + m.mux.HandleFunc("PUT /sources/{id}", try(&m.UpdateSource)) + m.mux.HandleFunc("DELETE /sources/{id}", try(&m.DeleteSource)) + m.mux.HandleFunc("POST /sources/{id}/connection-check", try(&m.CreateConnectionCheckSources)) + m.mux.HandleFunc("GET /sources/{id}/connection-check", try(&m.GetConnectionCheckSources)) + + // Job routes + m.mux.HandleFunc("GET /jobs/", try(&m.ListJobs)) + m.mux.HandleFunc("GET /jobs/{id}", try(&m.GetJob)) + m.mux.HandleFunc("POST /jobs/{id}/cancel", try(&m.CancelJob)) + m.mux.HandleFunc("GET /jobs/{id}/download", try(&m.DownloadJobOutput)) + m.mux.HandleFunc("GET /jobs/{id}/details", try(&m.GetJobDetails)) + m.mux.HandleFunc("GET /jobs/{id}/failed-files", try(&m.GetJobFailedFiles)) + + // Workflow routes + m.mux.HandleFunc("POST /workflows/", try(&m.CreateWorkflow)) + m.mux.HandleFunc("GET /workflows/", try(&m.ListWorkflows)) + m.mux.HandleFunc("GET /workflows/{id}", try(&m.GetWorkflow)) + m.mux.HandleFunc("PUT /workflows/{id}", try(&m.UpdateWorkflow)) + m.mux.HandleFunc("DELETE /workflows/{id}", try(&m.DeleteWorkflow)) + m.mux.HandleFunc("POST /workflows/{id}/run", try(&m.RunWorkflow)) + + return m +} diff --git a/test/main_test.go b/test/main_test.go new file mode 100644 index 0000000..2a33bc9 --- /dev/null +++ b/test/main_test.go @@ -0,0 +1,159 @@ +//go:build integration + +package test + +import ( + "context" + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/aws-gopher/unstructured-sdk-go" +) + +func TestWorkflow(t *testing.T) { + key := os.Getenv("UNSTRUCTURED_API_KEY") + if key == "" { + t.Skip("skipping because UNSTRUCTURED_API_KEY is not set") + } + + pretty := func(v any) string { + data, err := json.MarshalIndent(v, "", " ") + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + return string(data) + } + + client, err := unstructured.New( + unstructured.WithKey(key), + ) + if err != nil { + t.Fatalf("failed to create client: %v", err) + } + + ctx := t.Context() + + workflow, err := client.CreateWorkflow(ctx, &unstructured.CreateWorkflowRequest{ + Name: "test", + WorkflowType: unstructured.WorkflowTypeCustom, + WorkflowNodes: []unstructured.WorkflowNode{ + &unstructured.PartitionerAuto{ + Name: "Partitioner", + }, + }, + }) + if err != nil { + t.Fatalf("failed to create workflow: %v", err) + } else { + t.Logf("created workflow %s:\n%s", workflow.ID, pretty(workflow)) + } + + t.Cleanup(func() { _ = client.DeleteWorkflow(ctx, workflow.ID) }) + + // get all the files under ./testdata and use them in a call to run the workflow. + dir, err := os.ReadDir("testdata") + if err != nil { + t.Fatalf("failed to read testdata: %v", err) + } + + files := make([]unstructured.File, 0, len(dir)) + + for _, file := range dir { + if file.IsDir() || file.Name() == ".DS_Store" || strings.HasSuffix(file.Name(), ".json") { + continue + } + + f, err := os.Open(filepath.Join("testdata", file.Name())) + if err != nil { + t.Fatalf("failed to open file: %v", err) + } + + t.Cleanup(func() { _ = f.Close() }) + + files = append(files, f) + } + + t.Logf("running workflow %s with %d files", workflow.ID, len(files)) + + job, err := client.RunWorkflow(ctx, &unstructured.RunWorkflowRequest{ + ID: workflow.ID, + InputFiles: files, + }) + if err != nil { + t.Errorf("failed to run workflow: %v", err) + return + } + + t.Logf("job %s:\n%s", job.ID, pretty(job)) + + deadline, ok := t.Deadline() + if !ok { + deadline = time.Now().Add(5 * time.Minute) + } + + tick := time.NewTicker(5 * time.Second) + defer tick.Stop() + + for ctx, cancel := context.WithDeadline(ctx, deadline.Add(-1*time.Second)); ; { + select { + case <-ctx.Done(): + t.Error("job took too long") + cancel() + + return + + case <-tick.C: + } + + last := job.Status + + job, err = client.GetJob(ctx, job.ID) + if err != nil { + t.Errorf("failed to get job: %v", err) + cancel() + + return + } + + if job.Status != last { + t.Logf("%s => %s (%s):\n%s", last, job.Status, time.Since(job.CreatedAt), pretty(job)) + } + + if job.Status == unstructured.JobStatusCompleted { + cancel() + break + } + } + + for _, node := range job.OutputNodeFiles { + download, err := client.DownloadJob(ctx, unstructured.DownloadJobRequest{ + JobID: job.ID, + NodeID: node.NodeID, + FileID: node.FileID, + }) + if err != nil { + t.Errorf("failed to download job: %v", err) + return + } + + t.Cleanup(func() { _ = download.Close() }) + + f, err := os.Create(filepath.Join("testdata", node.FileID+".json")) + if err != nil { + t.Errorf("failed to create file: %v", err) + return + } + + t.Cleanup(func() { _ = f.Close() }) + + if _, err := io.Copy(f, download); err != nil { + t.Errorf("failed to read download: %v", err) + } + } +} diff --git a/test/testdata/1706.03762v7.pdf b/test/testdata/1706.03762v7.pdf new file mode 100644 index 0000000..97d7c51 Binary files /dev/null and b/test/testdata/1706.03762v7.pdf differ diff --git a/util_test.go b/util_test.go new file mode 100644 index 0000000..52c0092 --- /dev/null +++ b/util_test.go @@ -0,0 +1,30 @@ +package unstructured + +import ( + "fmt" + "slices" +) + +func eq[T comparable](name string, got, want T) error { + if want != got { + return fmt.Errorf("expected %s to be %v, got %v", name, want, got) + } + + return nil +} + +func equal[T interface{ Equal(T) bool }](name string, got, want T) error { + if !want.Equal(got) { + return fmt.Errorf("expected %s to be %v, got %v", name, want, got) + } + + return nil +} + +func eqs[T comparable](name string, got, want []T) error { + if !slices.Equal(got, want) { + return fmt.Errorf("expected %s to be %v, got %v", name, want, got) + } + + return nil +} diff --git a/workflow.go b/workflow.go index 7935a7d..4422b98 100644 --- a/workflow.go +++ b/workflow.go @@ -10,7 +10,7 @@ type Workflow struct { Sources []string `json:"sources"` Destinations []string `json:"destinations"` WorkflowType *WorkflowType `json:"workflow_type,omitempty"` - WorkflowNodes []WorkflowNode `json:"workflow_nodes"` + WorkflowNodes WorkflowNodes `json:"workflow_nodes"` Schedule *WorkflowSchedule `json:"schedule,omitempty"` Status WorkflowState `json:"status"` CreatedAt time.Time `json:"created_at,omitzero"` @@ -42,15 +42,6 @@ const ( WorkflowStateInactive WorkflowState = "inactive" ) -// WorkflowNode represents a node in a workflow, such as a partitioner, chunker, or embedder. -type WorkflowNode struct { - ID *string `json:"id,omitempty"` - Name string `json:"name"` - Type string `json:"type"` - Subtype string `json:"subtype"` - Settings map[string]interface{} `json:"settings,omitempty"` -} - // WorkflowSchedule represents a workflow schedule, which can include cron tab entries. type WorkflowSchedule struct { CronTabEntries []CronTabEntry `json:"crontab_entries"` diff --git a/workflow_create.go b/workflow_create.go index cd89118..a75058b 100644 --- a/workflow_create.go +++ b/workflow_create.go @@ -20,7 +20,7 @@ type CreateWorkflowRequest struct { } // CreateWorkflow creates a new workflow -func (c *Client) CreateWorkflow(ctx context.Context, in CreateWorkflowRequest) (*Workflow, error) { +func (c *Client) CreateWorkflow(ctx context.Context, in *CreateWorkflowRequest) (*Workflow, error) { body, err := json.Marshal(in) if err != nil { return nil, fmt.Errorf("failed to marshal workflow request: %w", err) @@ -28,7 +28,7 @@ func (c *Client) CreateWorkflow(ctx context.Context, in CreateWorkflowRequest) ( req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/workflows").String(), + c.endpoint.JoinPath("workflows/").String(), bytes.NewReader(body), ) if err != nil { diff --git a/workflow_create_test.go b/workflow_create_test.go new file mode 100644 index 0000000..7efb262 --- /dev/null +++ b/workflow_create_test.go @@ -0,0 +1,55 @@ +package unstructured + +import ( + "errors" + "net/http" + "testing" + "time" +) + +func TestCreateWorkflow(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.CreateWorkflow = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`{` + + ` "created_at": "2025-06-22T11:37:21.648Z",` + + ` "destinations": ["aeebecc7-9d8e-4625-bf1d-815c2f084869"],` + + ` "id": "16b80fee-64dc-472d-8f26-1d7729b6423d",` + + ` "name": "test_workflow",` + + ` "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * 0"}]},` + + ` "sources": ["f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"],` + + ` "workflow_nodes": [],` + + ` "status": "active",` + + ` "workflow_type": "advanced"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write(response) + } + + workflow, err := client.CreateWorkflow(t.Context(), &CreateWorkflowRequest{ + Name: "test_workflow", + WorkflowType: WorkflowTypeAdvanced, + Schedule: String("weekly"), + SourceID: String("f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"), + DestinationID: String("aeebecc7-9d8e-4625-bf1d-815c2f084869"), + }) + if err != nil { + t.Fatalf("failed to create workflow: %v", err) + } + + if err := errors.Join( + eq("new_workflow.id", workflow.ID, "16b80fee-64dc-472d-8f26-1d7729b6423d"), + eq("new_workflow.name", workflow.Name, "test_workflow"), + eq("new_workflow.status", workflow.Status, WorkflowStateActive), + eq("new_workflow.workflow_type", ToVal(workflow.WorkflowType), WorkflowTypeAdvanced), + equal("new_workflow.created_at", workflow.CreatedAt, time.Date(2025, 6, 22, 11, 37, 21, 648000000, time.UTC)), + eqs("new_workflow.sources", workflow.Sources, []string{"f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"}), + eqs("new_workflow.destinations", workflow.Destinations, []string{"aeebecc7-9d8e-4625-bf1d-815c2f084869"}), + ); err != nil { + t.Error(err) + } +} diff --git a/workflow_delete.go b/workflow_delete.go index 9a048ca..5e77aed 100644 --- a/workflow_delete.go +++ b/workflow_delete.go @@ -10,7 +10,7 @@ import ( func (c *Client) DeleteWorkflow(ctx context.Context, id string) error { req, err := http.NewRequestWithContext(ctx, http.MethodDelete, - c.endpoint.JoinPath("/workflows", id).String(), + c.endpoint.JoinPath("workflows", id).String(), nil, ) if err != nil { diff --git a/workflow_get.go b/workflow_get.go index 4a271c1..269c16b 100644 --- a/workflow_get.go +++ b/workflow_get.go @@ -10,7 +10,7 @@ import ( func (c *Client) GetWorkflow(ctx context.Context, id string) (*Workflow, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/workflows", id).String(), + c.endpoint.JoinPath("workflows", id).String(), nil, ) if err != nil { diff --git a/workflow_list.go b/workflow_list.go index 15ef2a4..a1e7d4e 100644 --- a/workflow_list.go +++ b/workflow_list.go @@ -30,7 +30,7 @@ type ListWorkflowsRequest struct { func (c *Client) ListWorkflows(ctx context.Context, in *ListWorkflowsRequest) ([]Workflow, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, - c.endpoint.JoinPath("/workflows").String(), + c.endpoint.JoinPath("workflows").String(), nil, ) if err != nil { diff --git a/workflow_list_test.go b/workflow_list_test.go new file mode 100644 index 0000000..9fc1dd2 --- /dev/null +++ b/workflow_list_test.go @@ -0,0 +1,132 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "strings" + "testing" + "time" +) + +func TestListWorkflows(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListWorkflows = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`[` + + ` {` + + ` "created_at": "2025-06-22T11:37:21.648Z",` + + ` "destinations": ["aeebecc7-9d8e-4625-bf1d-815c2f084869"],` + + ` "id": "16b80fee-64dc-472d-8f26-1d7729b6423d",` + + ` "name": "test_workflow",` + + ` "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * 0"}]},` + + ` "sources": ["f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"],` + + ` "workflow_nodes": [],` + + ` "status": "active",` + + ` "workflow_type": "advanced"` + + ` }` + + `]`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + workflows, err := client.ListWorkflows(t.Context(), &ListWorkflowsRequest{ + SortBy: String("id"), + }) + if err != nil { + t.Fatalf("failed to list workflows: %v", err) + } + + if len(workflows) != 1 { + t.Fatalf("expected 1 workflow, got %d", len(workflows)) + } + + workflow := workflows[0] + if err := errors.Join( + eq("workflow.id", workflow.ID, "16b80fee-64dc-472d-8f26-1d7729b6423d"), + eq("workflow.name", workflow.Name, "test_workflow"), + eq("workflow.workflow_type", ToVal(workflow.WorkflowType), WorkflowTypeAdvanced), + eq("workflow.status", workflow.Status, WorkflowStateActive), + equal("workflow.created_at", workflow.CreatedAt, time.Date(2025, 6, 22, 11, 37, 21, 648000000, time.UTC)), + eqs("workflow.schedule.crontab_entries", ToVal(workflow.Schedule).CronTabEntries, []CronTabEntry{{CronExpression: "0 0 * * 0"}}), + eqs("workflow.sources", workflow.Sources, []string{"f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"}), + eqs("workflow.destinations", workflow.Destinations, []string{"aeebecc7-9d8e-4625-bf1d-815c2f084869"}), + ); err != nil { + t.Error(err) + } +} + +func TestListWorkflowsEmpty(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListWorkflows = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`[]`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + workflows, err := client.ListWorkflows(t.Context(), &ListWorkflowsRequest{ + SortBy: String("id"), + }) + if err != nil { + t.Fatalf("failed to list workflows: %v", err) + } + + if len(workflows) != 0 { + t.Fatalf("expected 0 workflows, got %d", len(workflows)) + } +} + +func TestListWorkflowsErrorCode(t *testing.T) { + t.Parallel() + + for _, code := range []int{ + http.StatusBadRequest, // 400 + http.StatusUnauthorized, // 401 + http.StatusForbidden, // 403 + http.StatusNotFound, // 404 + http.StatusInternalServerError, // 500 + http.StatusBadGateway, // 502 + http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout, // 504 + } { + t.Run(strconv.Itoa(code), func(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + mux.ListWorkflows = func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + } + + _, err := client.ListWorkflows(t.Context(), &ListWorkflowsRequest{ + SortBy: String("id"), + }) + if err == nil { + t.Fatalf("expected error, got nil") + } + + if !strings.Contains(err.Error(), "API error occurred") { + t.Fatalf("expected error to contain 'API error occurred', got %v", err) + } + + var apierr *APIError + if !errors.As(err, &apierr) { + t.Fatalf("expected error to be an %T, got %T", apierr, err) + } + + if apierr.Code != code { + t.Fatalf("expected error code to be %d, got %d", code, apierr.Code) + } + }) + } +} diff --git a/workflow_node.go b/workflow_node.go new file mode 100644 index 0000000..e01036d --- /dev/null +++ b/workflow_node.go @@ -0,0 +1,163 @@ +package unstructured + +import ( + "encoding/json" + "errors" + "fmt" +) + +// WorkflowNodes is a slice of WorkflowNode. +type WorkflowNodes []WorkflowNode + +// ValidateNodeOrder validates the order of nodes in a workflow. +func (w WorkflowNodes) ValidateNodeOrder() (err error) { + if len(w) == 0 { + return nil + } + + // you have to partition. + switch w[0].(type) { + case *PartitionerAuto, *PartitionerVLM, *PartitionerHiRes, *PartitionerFast: + // good + default: + err = errors.Join(err, errors.New("first node must be a partitioner")) + } + + last := nodeTypePartition + + for i, node := range w[1:] { + switch node.(type) { + case *PartitionerAuto, *PartitionerVLM, *PartitionerHiRes, *PartitionerFast: + err = errors.Join(err, errors.New("only the first node may be a partitioner")) + + case *ChunkerCharacter, *ChunkerTitle, *ChunkerPage, *ChunkerSimilarity: + // you can chunk after you partition. + if last != nodeTypePartition && last != nodeTypeEnrich { + err = errors.Join(err, fmt.Errorf("%s must be after %s or %s", nodeTypeChunk, nodeTypePartition, nodeTypeEnrich)) + } + + last = nodeTypeChunk + + case *Embedder: + // you can embed after you chunk. + if last != nodeTypeChunk { + err = errors.Join(err, fmt.Errorf("%s must be after %s", nodeTypeEmbed, nodeTypeChunk)) + } + + last = nodeTypeEmbed + + case *Enricher: + // you can enrich before you chunk... + if i == len(w[1:])-1 { + err = errors.Join(err, errors.New("enricher must not be the last node")) + } + + // and after you partition or enrich. + if last != nodeTypePartition && last != nodeTypeEnrich { + err = errors.Join(err, fmt.Errorf("%s must be after %s or %s", nodeTypeEnrich, nodeTypePartition, nodeTypeEnrich)) + } + + last = nodeTypeEnrich + + default: + err = errors.Join(err, fmt.Errorf("invalid node type %T at index %d", node, i+1)) + } + } + + return err +} + +type ( + // Embedder represents an embedding node in a workflow. + Embedder struct{ WorkflowNode } +) + +// MarshalJSON implements the json.Marshaler interface. +func (w WorkflowNodes) MarshalJSON() ([]byte, error) { + nodes := make([]json.RawMessage, len(w)) + + for i, node := range w { + msg, err := json.Marshal(node) + if err != nil { + return nil, fmt.Errorf("failed to marshal workflow node: %w", err) + } + + nodes[i] = msg + } + + headerData, err := json.Marshal(nodes) + if err != nil { + return nil, fmt.Errorf("failed to marshal workflow nodes: %w", err) + } + + return headerData, nil +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (w *WorkflowNodes) UnmarshalJSON(data []byte) error { + var nodes []json.RawMessage + if err := json.Unmarshal(data, &nodes); err != nil { + return fmt.Errorf("failed to unmarshal workflow nodes: %w", err) + } + + if cap(*w) < len(nodes) { + *w = make(WorkflowNodes, 0, len(nodes)) + } + + for _, node := range nodes { + val, err := unmarshalNode(node) + if err != nil { + return fmt.Errorf("failed to unmarshal workflow node: %w", err) + } + + *w = append(*w, val) + } + + return nil +} + +// WorkflowNode is a node in a workflow. +type WorkflowNode interface { + json.Marshaler + isNode() +} + +type header struct { + ID string `json:"id,omitempty"` + Name string `json:"name"` + Type string `json:"type"` + Subtype string `json:"subtype"` + Settings json.RawMessage `json:"settings"` +} + +const ( + nodeTypePartition = "partition" + nodeTypeEnrich = "prompter" + nodeTypeChunk = "chunk" + nodeTypeEmbed = "embed" +) + +func unmarshalNode(data []byte) (WorkflowNode, error) { + var header header + if err := json.Unmarshal(data, &header); err != nil { + return nil, fmt.Errorf("failed to unmarshal workflow node: %w", err) + } + + switch header.Type { + case nodeTypePartition: + return unmarshalPartitioner(header) + + case nodeTypeChunk: + return unmarshalChunker(header) + + case nodeTypeEmbed: + return unmarshalEmbedder(header) + + case nodeTypeEnrich: + return unmarshalEnricher(header) + } + + return nil, fmt.Errorf("unknown node type: %s", header.Type) +} + +func unmarshalEmbedder(_ header) (WorkflowNode, error) { return &Embedder{}, nil } diff --git a/workflow_run.go b/workflow_run.go index 8a7f57a..2c1fcaf 100644 --- a/workflow_run.go +++ b/workflow_run.go @@ -1,29 +1,61 @@ package unstructured import ( + "bytes" "context" "fmt" + "io" + "mime/multipart" "net/http" ) // RunWorkflowRequest represents the request to run a workflow type RunWorkflowRequest struct { - InputFiles []string + ID string + + // InputFiles is a list of files to upload to the workflow. + // The files must implement the io.Reader interface. + InputFiles []File +} + +// File represents a file to upload to the workflow. +type File interface { + Name() string + io.Reader +} + +// FileBytes implements the File interface for an io.Reader in memory. +type FileBytes struct { + Filename string + Bytes io.Reader +} + +// Name returns the name of the file. +func (f *FileBytes) Name() string { return f.Filename } + +// Read reads the file into the given buffer. +func (f *FileBytes) Read(p []byte) (n int, err error) { + return f.Bytes.Read(p) //nolint:wrapcheck } // RunWorkflow runs a workflow by triggering a new job -func (c *Client) RunWorkflow(ctx context.Context, id string, _ *RunWorkflowRequest) (*Job, error) { - // For now, we'll implement a simple version without file uploads - // The actual implementation would need multipart form data handling +func (c *Client) RunWorkflow(ctx context.Context, in *RunWorkflowRequest) (*Job, error) { req, err := http.NewRequestWithContext(ctx, http.MethodPost, - c.endpoint.JoinPath("/workflows", id, "run").String(), + c.endpoint.JoinPath("workflows", in.ID, "run").String(), nil, ) if err != nil { return nil, fmt.Errorf("failed to create HTTP request: %w", err) } + // Determine if we need to upload files + if len(in.InputFiles) > 0 { + if err := addfiles(req, in.InputFiles); err != nil { + return nil, fmt.Errorf("failed to add files to request: %w", err) + } + } + var job Job if err := c.do(req, &job); err != nil { return nil, fmt.Errorf("failed to run workflow: %w", err) @@ -31,3 +63,36 @@ func (c *Client) RunWorkflow(ctx context.Context, id string, _ *RunWorkflowReque return &job, nil } + +// runWorkflowWithoutFiles handles the case where no files are provided +func addfiles(req *http.Request, files []File) error { + // Create a buffer to hold the multipart form data + var buf bytes.Buffer + writer := multipart.NewWriter(&buf) + + // Add each file to the multipart form + for _, f := range files { + // Create a form file field + part, err := writer.CreateFormFile("input_files", f.Name()) + if err != nil { + return fmt.Errorf("failed to create form file for %s: %w", f, err) + } + + // Copy the file content to the form part + if _, err := io.Copy(part, f); err != nil { + return fmt.Errorf("failed to copy file content for %s: %w", f, err) + } + } + + // Close the multipart writer + if err := writer.Close(); err != nil { + return fmt.Errorf("failed to close multipart writer: %w", err) + } + + req.Body = io.NopCloser(&buf) + + // Set the content type header for multipart form data + req.Header.Set("Content-Type", writer.FormDataContentType()) + + return nil +} diff --git a/workflow_run_test.go b/workflow_run_test.go new file mode 100644 index 0000000..14958a3 --- /dev/null +++ b/workflow_run_test.go @@ -0,0 +1,49 @@ +package unstructured + +import ( + "errors" + "net/http" + "testing" +) + +func TestRunWorkflow(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "16b80fee-64dc-472d-8f26-1d7729b6423d" + + mux.RunWorkflow = func(w http.ResponseWriter, r *http.Request) { + if val := r.PathValue("id"); val != id { + http.Error(w, "workflow ID "+val+" not found", http.StatusNotFound) + return + } + + response := []byte(`{` + + ` "created_at": "2025-06-22T11:37:21.648Z",` + + ` "id": "fcdc4994-eea5-425c-91fa-e03f2bd8030d",` + + ` "status": "IN_PROGRESS",` + + ` "runtime": null,` + + ` "workflow_id": "` + id + `",` + + ` "workflow_name": "test_workflow"` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusAccepted) + w.Write(response) + } + + job, err := client.RunWorkflow(t.Context(), &RunWorkflowRequest{ID: id}) + if err != nil { + t.Fatalf("failed to run workflow: %v", err) + } + + if err := errors.Join( + eq("new_job.id", job.ID, "fcdc4994-eea5-425c-91fa-e03f2bd8030d"), + eq("new_job.workflow_id", job.WorkflowID, id), + eq("new_job.workflow_name", job.WorkflowName, "test_workflow"), + eq("new_job.status", job.Status, JobStatusInProgress), + ); err != nil { + t.Error(err) + } +} diff --git a/workflow_update.go b/workflow_update.go index 63844a9..fea6f59 100644 --- a/workflow_update.go +++ b/workflow_update.go @@ -30,7 +30,7 @@ func (c *Client) UpdateWorkflow(ctx context.Context, in UpdateWorkflowRequest) ( req, err := http.NewRequestWithContext(ctx, http.MethodPut, - c.endpoint.JoinPath("/workflows", in.ID).String(), + c.endpoint.JoinPath("workflows", in.ID).String(), bytes.NewReader(body), ) if err != nil { diff --git a/workflow_update_test.go b/workflow_update_test.go new file mode 100644 index 0000000..6b7b699 --- /dev/null +++ b/workflow_update_test.go @@ -0,0 +1,60 @@ +package unstructured + +import ( + "errors" + "net/http" + "strconv" + "testing" + "time" +) + +func TestUpdateWorkflow(t *testing.T) { + t.Parallel() + + client, mux := testclient(t) + + id := "16b80fee-64dc-472d-8f26-1d7729b6423d" + + mux.UpdateWorkflow = func(w http.ResponseWriter, _ *http.Request) { + response := []byte(`{` + + ` "id": "` + id + `",` + + ` "name": "test_workflow",` + + ` "status": "active",` + + ` "workflow_type": "advanced",` + + ` "created_at": "2025-06-22T11:37:21.648Z",` + + ` "sources": ["f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"],` + + ` "destinations": ["aeebecc7-9d8e-4625-bf1d-815c2f084869"],` + + ` "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * 0"}]},` + + ` "workflow_nodes": []` + + `}`) + + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(response))) + w.Write(response) + } + + updated, err := client.UpdateWorkflow(t.Context(), UpdateWorkflowRequest{ + ID: id, + Name: String("test_workflow"), + WorkflowType: Ptr(WorkflowTypeAdvanced), + Schedule: String("weekly"), + SourceID: String("f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"), + DestinationID: String("aeebecc7-9d8e-4625-bf1d-815c2f084869"), + }) + if err != nil { + t.Fatalf("failed to update workflow: %v", err) + } + + if err := errors.Join( + eq("updated_workflow.id", updated.ID, id), + eq("updated_workflow.name", updated.Name, "test_workflow"), + eq("updated_workflow.status", updated.Status, WorkflowStateActive), + eq("updated_workflow.workflow_type", ToVal(updated.WorkflowType), WorkflowTypeAdvanced), + equal("workflow.created_at", updated.CreatedAt, time.Date(2025, 6, 22, 11, 37, 21, 648000000, time.UTC)), + eqs("updated_workflow.sources", updated.Sources, []string{"f1f7b1b2-8e4b-4a2b-8f1d-3e3c7c9e5a3c"}), + eqs("updated_workflow.destinations", updated.Destinations, []string{"aeebecc7-9d8e-4625-bf1d-815c2f084869"}), + eqs("updated_workflow.schedule.crontab_entries", updated.Schedule.CronTabEntries, []CronTabEntry{{CronExpression: "0 0 * * 0"}}), + ); err != nil { + t.Error(err) + } +}