diff --git a/README.md b/README.md index 6a147e5..73a61ea 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ The CLI provides three main command groups: - **`openapi spec`** - Commands for working with OpenAPI specifications ([documentation](./cmd/openapi/commands/openapi/README.md)) - `bootstrap` - Create a new OpenAPI document with best practice examples - `bundle` - Bundle external references into components section - - `clean` - Remove unused components from an OpenAPI specification + - `clean` - Remove unused components and unused top-level tags from an OpenAPI specification - `explore` - Interactively explore an OpenAPI specification in the terminal - `inline` - Inline all references in an OpenAPI specification - `join` - Join multiple OpenAPI documents into a single document diff --git a/cmd/openapi/commands/openapi/README.md b/cmd/openapi/commands/openapi/README.md index 6475ff5..3fa2e95 100644 --- a/cmd/openapi/commands/openapi/README.md +++ b/cmd/openapi/commands/openapi/README.md @@ -135,7 +135,7 @@ paths: ### `clean` -Remove unused components from an OpenAPI specification to create a cleaner, more maintainable document. +Remove unused components and unused top‑level tags from an OpenAPI specification using reachability seeded from /paths and top‑level security. ```bash # Clean to stdout (pipe-friendly) @@ -150,10 +150,12 @@ openapi spec clean -w ./spec.yaml What cleaning does: -- Removes unused components from all component types (schemas, responses, parameters, etc.) -- Tracks all references throughout the document including `$ref` and security scheme name references -- Preserves all components that are actually used in the specification -- Handles complex reference patterns including circular references and nested components +- Performs reachability-based cleanup seeded only from API surface areas (/paths and top‑level security) +- Expands through $ref links to components until a fixed point is reached +- Preserves security schemes referenced by name in security requirement objects (global and operation‑level) +- Removes unused components from all component types (schemas, responses, parameters, examples, request bodies, headers, links, callbacks, path items) +- Removes unused top‑level tags that are not referenced by any operation +- Handles complex reference patterns; only components reachable from the API surface are kept **Before cleaning:** @@ -823,6 +825,7 @@ openapi spec snip -w --operation /internal/debug:GET ./spec.yaml **Two Operation Modes:** **Interactive Mode** (no operation flags): + - Launch a terminal UI to browse all operations - Select operations with Space key - Press 'a' to select all, 'A' to deselect all @@ -830,6 +833,7 @@ openapi spec snip -w --operation /internal/debug:GET ./spec.yaml - Press 'q' or Esc to cancel **Command-Line Mode** (operation flags specified): + - Remove operations specified via flags without UI - Supports `--operationId` for operation IDs - Supports `--operation` for path:method pairs @@ -951,7 +955,7 @@ openapi spec clean | \ openapi spec upgrade | \ openapi spec validate -# Alternative: Clean after bundling to remove unused components +# Alternative: Clean after bundling to remove unused components and unused top-level tags openapi spec bundle ./spec.yaml ./bundled.yaml openapi spec clean ./bundled.yaml ./clean-bundled.yaml openapi spec validate ./clean-bundled.yaml diff --git a/cmd/openapi/commands/openapi/clean.go b/cmd/openapi/commands/openapi/clean.go index 48609cb..74a5ede 100644 --- a/cmd/openapi/commands/openapi/clean.go +++ b/cmd/openapi/commands/openapi/clean.go @@ -12,15 +12,19 @@ import ( var cleanCmd = &cobra.Command{ Use: "clean [output-file]", - Short: "Remove unused components from an OpenAPI specification", - Long: `Remove unused components from an OpenAPI specification to create a cleaner, more focused document. + Short: "Remove unused components and unused top-level tags from an OpenAPI specification", + Long: `Remove unused components and unused top-level tags from an OpenAPI specification to create a cleaner, more focused document. -This command analyzes an OpenAPI document to identify which components are actually referenced -and removes any unused components, reducing document size and improving clarity. +This command uses reachability-based analysis to keep only what is actually used by the API surface: +- Seeds reachability exclusively from API surface areas: entries under /paths and the top-level security section +- Expands through $ref links across component sections until a fixed point is reached +- Preserves security schemes referenced by name in security requirement objects (global or operation-level) +- Prunes any components that are not reachable from the API surface +- Removes unused top-level tags that are not referenced by any operation What gets cleaned: - Unused schemas in components/schemas -- Unused responses in components/responses +- Unused responses in components/responses - Unused parameters in components/parameters - Unused examples in components/examples - Unused request bodies in components/requestBodies @@ -29,14 +33,13 @@ What gets cleaned: - Unused links in components/links - Unused callbacks in components/callbacks - Unused path items in components/pathItems +- Unused top-level tags (global tags not referenced by any operation) Special handling for security schemes: Security schemes can be referenced in two ways: 1. By $ref (like other components) 2. By name in security requirement objects (global or operation-level) - -The clean command correctly handles both cases and preserves security schemes -that are referenced by name in security blocks. +The clean command correctly handles both cases and preserves security schemes that are referenced by name in security blocks. Benefits of cleaning: - Reduce document size by removing dead code diff --git a/cmd/openapi/commands/openapi/snip.go b/cmd/openapi/commands/openapi/snip.go index b6d9bf3..8ccf1a5 100644 --- a/cmd/openapi/commands/openapi/snip.go +++ b/cmd/openapi/commands/openapi/snip.go @@ -13,9 +13,11 @@ import ( ) var ( - snipWriteInPlace bool - snipOperationIDs []string - snipOperations []string + snipWriteInPlace bool + snipOperationIDs []string + snipOperations []string + snipKeepOperationIDs []string + snipKeepOperations []string ) var snipCmd = &cobra.Command{ @@ -73,6 +75,9 @@ func init() { snipCmd.Flags().BoolVarP(&snipWriteInPlace, "write", "w", false, "write result in-place to input file") snipCmd.Flags().StringSliceVar(&snipOperationIDs, "operationId", nil, "operation ID to remove (can be comma-separated or repeated)") snipCmd.Flags().StringSliceVar(&snipOperations, "operation", nil, "operation as path:method to remove (can be comma-separated or repeated)") + // Keep-mode flags (mutually exclusive with remove-mode flags) + snipCmd.Flags().StringSliceVar(&snipKeepOperationIDs, "keepOperationId", nil, "operation ID to keep (can be comma-separated or repeated)") + snipCmd.Flags().StringSliceVar(&snipKeepOperations, "keepOperation", nil, "operation as path:method to keep (can be comma-separated or repeated)") } func runSnip(cmd *cobra.Command, args []string) error { @@ -84,20 +89,29 @@ func runSnip(cmd *cobra.Command, args []string) error { outputFile = args[1] } - // Check if any operations were specified via flags - hasOperationFlags := len(snipOperationIDs) > 0 || len(snipOperations) > 0 + // Check which flag sets were specified + hasRemoveFlags := len(snipOperationIDs) > 0 || len(snipOperations) > 0 + hasKeepFlags := len(snipKeepOperationIDs) > 0 || len(snipKeepOperations) > 0 - // If -w is specified without operation flags, error - if snipWriteInPlace && !hasOperationFlags { - return fmt.Errorf("--write flag requires specifying operations via --operationId or --operation flags") + // If -w is specified without any operation selection flags, error + if snipWriteInPlace && !(hasRemoveFlags || hasKeepFlags) { + return fmt.Errorf("--write flag requires specifying operations via --operationId/--operation or --keepOperationId/--keepOperation") } - if !hasOperationFlags { - // No flags - interactive mode + // Interactive mode when no flags provided + if !hasRemoveFlags && !hasKeepFlags { return runSnipInteractive(ctx, inputFile, outputFile) } - // Flags specified - CLI mode + // Disallow mixing keep + remove flags; ambiguous intent + if hasRemoveFlags && hasKeepFlags { + return fmt.Errorf("cannot combine keep and remove flags; use either --operationId/--operation or --keepOperationId/--keepOperation") + } + + // CLI mode + if hasKeepFlags { + return runSnipCLIKeep(ctx, inputFile, outputFile) + } return runSnipCLI(ctx, inputFile, outputFile) } @@ -139,6 +153,87 @@ func runSnipCLI(ctx context.Context, inputFile, outputFile string) error { return processor.WriteDocument(ctx, doc) } +func runSnipCLIKeep(ctx context.Context, inputFile, outputFile string) error { + // Create processor + processor, err := NewOpenAPIProcessor(inputFile, outputFile, snipWriteInPlace) + if err != nil { + return err + } + + // Load document + doc, validationErrors, err := processor.LoadDocument(ctx) + if err != nil { + return err + } + + // Report validation errors (if any) + processor.ReportValidationErrors(validationErrors) + + // Parse keep flags + keepOps, err := parseKeepOperationFlags() + if err != nil { + return err + } + if len(keepOps) == 0 { + return fmt.Errorf("no operations specified to keep") + } + + // Collect all operations from the document + allOps, err := explore.CollectOperations(ctx, doc) + if err != nil { + return fmt.Errorf("failed to collect operations: %w", err) + } + if len(allOps) == 0 { + return fmt.Errorf("no operations found in the OpenAPI document") + } + + // Build lookup sets for keep filters + keepByID := map[string]bool{} + keepByPathMethod := map[string]bool{} + for _, k := range keepOps { + if k.OperationID != "" { + keepByID[k.OperationID] = true + } + if k.Path != "" && k.Method != "" { + key := strings.ToUpper(k.Method) + " " + k.Path + keepByPathMethod[key] = true + } + } + + // Compute removal list = all - keep + var operationsToRemove []openapi.OperationIdentifier + for _, op := range allOps { + if op.OperationID != "" && keepByID[op.OperationID] { + continue + } + key := strings.ToUpper(op.Method) + " " + op.Path + if keepByPathMethod[key] { + continue + } + operationsToRemove = append(operationsToRemove, openapi.OperationIdentifier{ + Path: op.Path, + Method: strings.ToUpper(op.Method), + }) + } + + // If nothing to remove, write as-is + if len(operationsToRemove) == 0 { + processor.PrintSuccess("No operations to remove based on keep filters; writing document unchanged") + return processor.WriteDocument(ctx, doc) + } + + // Perform the snip + removed, err := openapi.Snip(ctx, doc, operationsToRemove) + if err != nil { + return fmt.Errorf("failed to snip operations: %w", err) + } + + processor.PrintSuccess(fmt.Sprintf("Successfully kept %d operation(s) and removed %d operation(s) with cleanup", len(allOps)-removed, removed)) + + // Write the snipped document + return processor.WriteDocument(ctx, doc) +} + func runSnipInteractive(ctx context.Context, inputFile, outputFile string) error { // Load the OpenAPI document doc, err := loadOpenAPIDocument(ctx, inputFile) @@ -306,6 +401,47 @@ func parseOperationFlags() ([]openapi.OperationIdentifier, error) { return operations, nil } +// parseKeepOperationFlags parses the keep flags into operation identifiers +// Handles both repeated flags and comma-separated values +func parseKeepOperationFlags() ([]openapi.OperationIdentifier, error) { + var operations []openapi.OperationIdentifier + + // Parse keep operation IDs + for _, opID := range snipKeepOperationIDs { + if opID != "" { + operations = append(operations, openapi.OperationIdentifier{ + OperationID: opID, + }) + } + } + + // Parse keep path:method operations + for _, op := range snipKeepOperations { + if op == "" { + continue + } + + parts := strings.SplitN(op, ":", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid keep operation format: %s (expected path:METHOD format, e.g., /users:GET)", op) + } + + path := parts[0] + method := strings.ToUpper(parts[1]) + + if path == "" || method == "" { + return nil, fmt.Errorf("invalid keep operation format: %s (path and method cannot be empty)", op) + } + + operations = append(operations, openapi.OperationIdentifier{ + Path: path, + Method: method, + }) + } + + return operations, nil +} + // GetSnipCommand returns the snip command for external use func GetSnipCommand() *cobra.Command { return snipCmd diff --git a/cmd/openapi/internal/explore/tui/model.go b/cmd/openapi/internal/explore/tui/model.go index 9819f08..2008291 100644 --- a/cmd/openapi/internal/explore/tui/model.go +++ b/cmd/openapi/internal/explore/tui/model.go @@ -140,8 +140,8 @@ func NewModelWithConfig(operations []explore.OperationInfo, docTitle, docVersion // Only relevant when selectionConfig.Enabled is true func (m Model) GetSelectedOperations() []explore.OperationInfo { var selected []explore.OperationInfo - for idx := range m.selected { - if idx < len(m.operations) { + for idx, isSelected := range m.selected { + if isSelected && idx < len(m.operations) { selected = append(selected, m.operations[idx]) } } diff --git a/cmd/openapi/internal/explore/tui/model_test.go b/cmd/openapi/internal/explore/tui/model_test.go new file mode 100644 index 0000000..7bcbdf9 --- /dev/null +++ b/cmd/openapi/internal/explore/tui/model_test.go @@ -0,0 +1,119 @@ +package tui_test + +import ( + "testing" + + "github.com/speakeasy-api/openapi/cmd/openapi/internal/explore" + "github.com/stretchr/testify/assert" +) + +// TestGetSelectedOperations_SelectAllThenDeselect reproduces GEN-2003 +// where selecting all operations then deselecting some would still return all operations +func TestGetSelectedOperations_SelectAllThenDeselect(t *testing.T) { + t.Parallel() + + // Create test operations + operations := []explore.OperationInfo{ + {Path: "/users", Method: "GET", OperationID: "getUsers"}, + {Path: "/users", Method: "POST", OperationID: "createUser"}, + {Path: "/users/{id}", Method: "GET", OperationID: "getUser"}, + {Path: "/users/{id}", Method: "DELETE", OperationID: "deleteUser"}, + {Path: "/posts", Method: "GET", OperationID: "getPosts"}, + } + + // Simulate selecting all operations (like pressing 'a') + // Then deselecting some (like pressing Space on specific operations) + + // Create a model with all operations initially selected + selectedMap := make(map[int]bool) + for i := range operations { + selectedMap[i] = true + } + + // Now deselect operations at indices 0 and 2 (simulating Space key on those) + selectedMap[0] = false + selectedMap[2] = false + + // Since we can't directly manipulate the model's internal state in tests, + // we're testing the core logic that was fixed in GetSelectedOperations() + + // Expected: Only operations at indices 1, 3, 4 should be returned + // (indices 0 and 2 were deselected) + expectedSelected := []explore.OperationInfo{ + operations[1], // POST /users + operations[3], // DELETE /users/{id} + operations[4], // GET /posts + } + + // Manual test of the fixed logic + var actualSelected []explore.OperationInfo + for idx, isSelected := range selectedMap { + if isSelected && idx < len(operations) { + actualSelected = append(actualSelected, operations[idx]) + } + } + + assert.ElementsMatch(t, expectedSelected, actualSelected, + "should only return operations that are marked as selected (true)") + assert.Len(t, actualSelected, 3, "should return 3 selected operations") + assert.NotContains(t, actualSelected, operations[0], "should not include deselected operation at index 0") + assert.NotContains(t, actualSelected, operations[2], "should not include deselected operation at index 2") +} + +// TestGetSelectedOperations_EmptySelection tests that no operations are returned when none are selected +func TestGetSelectedOperations_EmptySelection(t *testing.T) { + t.Parallel() + + operations := []explore.OperationInfo{ + {Path: "/users", Method: "GET", OperationID: "getUsers"}, + {Path: "/posts", Method: "GET", OperationID: "getPosts"}, + } + + selectedMap := make(map[int]bool) + // All entries are false or don't exist + + selectedMap[0] = false + selectedMap[1] = false + + var actualSelected []explore.OperationInfo + for idx, isSelected := range selectedMap { + if isSelected && idx < len(operations) { + actualSelected = append(actualSelected, operations[idx]) + } + } + + assert.Empty(t, actualSelected, "should return no operations when all are deselected") +} + +// TestGetSelectedOperations_PartialSelection tests mixed selection state +func TestGetSelectedOperations_PartialSelection(t *testing.T) { + t.Parallel() + + operations := []explore.OperationInfo{ + {Path: "/users", Method: "GET", OperationID: "getUsers"}, + {Path: "/users", Method: "POST", OperationID: "createUser"}, + {Path: "/posts", Method: "GET", OperationID: "getPosts"}, + } + + selectedMap := map[int]bool{ + 0: true, // Selected + 1: false, // Deselected + 2: true, // Selected + } + + expectedSelected := []explore.OperationInfo{ + operations[0], + operations[2], + } + + var actualSelected []explore.OperationInfo + for idx, isSelected := range selectedMap { + if isSelected && idx < len(operations) { + actualSelected = append(actualSelected, operations[idx]) + } + } + + assert.ElementsMatch(t, expectedSelected, actualSelected, + "should only return operations marked as selected") + assert.Len(t, actualSelected, 2, "should return 2 selected operations") +} diff --git a/openapi/clean.go b/openapi/clean.go index 3ac23d2..7554b7c 100644 --- a/openapi/clean.go +++ b/openapi/clean.go @@ -9,18 +9,24 @@ import ( "github.com/speakeasy-api/openapi/sequencedmap" ) -// Clean removes unused components from the OpenAPI document. -// It walks through the document to track all referenced components and removes -// any components that are not referenced. Security schemes are handled specially -// as they can be referenced by name in security blocks rather than by $ref. +// Clean removes unused, unreachable elements from the OpenAPI document using reachability from paths and security. +// +// How it works (high level): +// +// - Seed reachability only from: +// - Operations under /paths (responses, request bodies, parameters, schemas, etc.) +// - Security requirements (top-level and operation-level), referenced by $ref and by name +// - Expand reachability only through components already marked as used until a fixed point +// - Remove anything not reachable from those seeds +// - Also remove top-level tags that are not referenced by any operation // // This function modifies the document in place. // // Why use Clean? // -// - **Reduce document size**: Remove unused component definitions that bloat the specification -// - **Improve clarity**: Keep only the components that are actually used in the API -// - **Optimize tooling performance**: Smaller documents with fewer unused components process faster +// - **Reduce document size**: Remove unused component definitions and tags that bloat the specification +// - **Improve clarity**: Keep only the elements that are actually used by operations/security +// - **Optimize tooling performance**: Smaller documents with fewer unused elements process faster // - **Maintain clean specifications**: Prevent accumulation of dead code in API definitions // - **Prepare for distribution**: Clean up specifications before sharing or publishing // @@ -36,6 +42,7 @@ import ( // - Unused links in components/links // - Unused callbacks in components/callbacks // - Unused path items in components/pathItems +// - Unused tags in the top-level tags array // // Special handling for security schemes: // @@ -47,16 +54,13 @@ import ( // // Example usage: // -// // Load an OpenAPI document with potentially unused components -// doc := &OpenAPI{...} -// -// // Clean up unused components (modifies doc in place) +// // Load an OpenAPI document and prune unused elements (modifies doc in place) // err := Clean(ctx, doc) // if err != nil { // return fmt.Errorf("failed to clean document: %w", err) // } // -// // doc now has only the components that are actually referenced +// // doc now contains only elements reachable from /paths and security, with unused tags removed // // Parameters: // - ctx: Context for the operation @@ -65,12 +69,12 @@ import ( // Returns: // - error: Any error that occurred during cleaning func Clean(ctx context.Context, doc *OpenAPI) error { - if doc == nil || doc.Components == nil { + if doc == nil { return nil } // Track referenced components by type and name - referencedComponents := &referencedComponentTracker{ + referenced := &referencedComponentTracker{ schemas: make(map[string]bool), responses: make(map[string]bool), parameters: make(map[string]bool), @@ -83,58 +87,66 @@ func Clean(ctx context.Context, doc *OpenAPI) error { pathItems: make(map[string]bool), } - // Walk through the document and track all references - for item := range Walk(ctx, doc) { - err := item.Match(Matcher{ - // Track schema references - Schema: func(schema *oas3.JSONSchema[oas3.Referenceable]) error { - return trackSchemaReferences(schema, referencedComponents) - }, - // Track component references - ReferencedPathItem: func(ref *ReferencedPathItem) error { - return trackPathItemReference(ref, referencedComponents.pathItems) - }, - ReferencedParameter: func(ref *ReferencedParameter) error { - return trackParameterReference(ref, referencedComponents.parameters) - }, - ReferencedExample: func(ref *ReferencedExample) error { - return trackExampleReference(ref, referencedComponents.examples) - }, - ReferencedRequestBody: func(ref *ReferencedRequestBody) error { - return trackRequestBodyReference(ref, referencedComponents.requestBodies) - }, - ReferencedResponse: func(ref *ReferencedResponse) error { - return trackResponseReference(ref, referencedComponents.responses) - }, - ReferencedHeader: func(ref *ReferencedHeader) error { - return trackHeaderReference(ref, referencedComponents.headers) - }, - ReferencedCallback: func(ref *ReferencedCallback) error { - return trackCallbackReference(ref, referencedComponents.callbacks) - }, - ReferencedLink: func(ref *ReferencedLink) error { - return trackLinkReference(ref, referencedComponents.links) - }, - ReferencedSecurityScheme: func(ref *ReferencedSecurityScheme) error { - return trackSecuritySchemeReference(ref, referencedComponents.securitySchemes) - }, - // Track security requirements (special case for security schemes) - Security: func(req *SecurityRequirement) error { - if req != nil { - for schemeName := range req.All() { - referencedComponents.securitySchemes[schemeName] = true - } - } - return nil - }, + // Phase 1: Seed references only from within /paths + err := walkAndTrackWithFilter(ctx, doc, referenced, func(ptr string) bool { + // Only allow references originating under paths + return strings.HasPrefix(ptr, "/paths/") || strings.HasPrefix(ptr, "/security") + }) + if err != nil { + return fmt.Errorf("failed to track references from paths: %w", err) + } + + // Phase 2: Expand closure of references reachable from used components. + // We repeatedly walk the document but only allow visiting content under components + // that are already marked as referenced, until no new references are discovered. + for { + before := countTracked(referenced) + + err := walkAndTrackWithFilter(ctx, doc, referenced, func(ptr string) bool { + typ, name, ok := extractComponentTypeAndName(ptr) + if !ok { + return false + } + switch typ { + case "schemas": + return referenced.schemas[name] + case "responses": + return referenced.responses[name] + case "parameters": + return referenced.parameters[name] + case "examples": + return referenced.examples[name] + case "requestBodies": + return referenced.requestBodies[name] + case "headers": + return referenced.headers[name] + case "securitySchemes": + return referenced.securitySchemes[name] + case "links": + return referenced.links[name] + case "callbacks": + return referenced.callbacks[name] + case "pathItems": + return referenced.pathItems[name] + default: + return false + } }) if err != nil { - return fmt.Errorf("failed to track references: %w", err) + return fmt.Errorf("failed to expand reachable references: %w", err) + } + + after := countTracked(referenced) + if after == before { + break // fixed point reached } } // Remove unused components - removeUnusedComponentsFromDocument(doc, referencedComponents) + removeUnusedComponentsFromDocument(doc, referenced) + + // Remove unused top-level tags + removeUnusedTagsFromDocument(doc, referenced) return nil } @@ -151,6 +163,8 @@ type referencedComponentTracker struct { links map[string]bool callbacks map[string]bool pathItems map[string]bool + // tags used by operations (referenced by name) + tags map[string]bool } // trackSchemaReferences tracks references within JSON schemas @@ -299,6 +313,31 @@ func trackSecuritySchemeReference(ref *ReferencedSecurityScheme, tracker map[str return nil } +// trackOperationTags tracks operation tag names into the tracker +func trackOperationTags(op *Operation, tracker *referencedComponentTracker) error { + if op == nil || tracker == nil { + return nil + } + for _, tag := range op.GetTags() { + if tracker.tags == nil { + tracker.tags = make(map[string]bool) + } + tracker.tags[tag] = true + } + return nil +} + +// trackSecurityRequirementNames tracks security scheme names referenced by a security requirement +func trackSecurityRequirementNames(req *SecurityRequirement, tracker map[string]bool) error { + if req == nil || tracker == nil { + return nil + } + for schemeName := range req.All() { + tracker[schemeName] = true + } + return nil +} + // extractComponentName extracts the component name from a reference string func extractComponentName(refStr, componentType string) string { prefix := "#/components/" + componentType + "/" @@ -478,3 +517,153 @@ func removeUnusedComponentsFromDocument(doc *OpenAPI, tracker *referencedCompone doc.Components = nil } } + +// removeUnusedTagsFromDocument prunes tags declared in the top-level tags array +// when they are not referenced by any operation's tags. +func removeUnusedTagsFromDocument(doc *OpenAPI, tracker *referencedComponentTracker) { + if doc == nil { + return + } + + // If no tags are declared, nothing to do + if len(doc.Tags) == 0 { + return + } + + // If there were no tags referenced, drop tags entirely + if tracker == nil || len(tracker.tags) == 0 { + doc.Tags = nil + return + } + + // Keep only tags with names referenced by operations + kept := make([]*Tag, 0, len(doc.Tags)) + for _, tg := range doc.Tags { + if tg == nil { + continue + } + if tracker.tags[tg.GetName()] { + kept = append(kept, tg) + } + } + + if len(kept) > 0 { + doc.Tags = kept + } else { + doc.Tags = nil + } +} + +// walkAndTrackWithFilter walks the OpenAPI document and tracks referenced components, +// but only for WalkItems whose JSON pointer location satisfies the allow predicate. +func walkAndTrackWithFilter(ctx context.Context, doc *OpenAPI, tracker *referencedComponentTracker, allow func(ptr string) bool) error { + for item := range Walk(ctx, doc) { + loc := string(item.Location.ToJSONPointer()) + + if !allow(loc) { + // Skip tracking for this location + continue + } + + err := item.Match(Matcher{ + // Track schema references only when allowed by location + Schema: func(schema *oas3.JSONSchema[oas3.Referenceable]) error { + return trackSchemaReferences(schema, tracker) + }, + // Track component references only when allowed by location + ReferencedPathItem: func(ref *ReferencedPathItem) error { + return trackPathItemReference(ref, tracker.pathItems) + }, + ReferencedParameter: func(ref *ReferencedParameter) error { + return trackParameterReference(ref, tracker.parameters) + }, + ReferencedExample: func(ref *ReferencedExample) error { + return trackExampleReference(ref, tracker.examples) + }, + ReferencedRequestBody: func(ref *ReferencedRequestBody) error { + return trackRequestBodyReference(ref, tracker.requestBodies) + }, + ReferencedResponse: func(ref *ReferencedResponse) error { + return trackResponseReference(ref, tracker.responses) + }, + ReferencedHeader: func(ref *ReferencedHeader) error { + return trackHeaderReference(ref, tracker.headers) + }, + ReferencedCallback: func(ref *ReferencedCallback) error { + return trackCallbackReference(ref, tracker.callbacks) + }, + ReferencedLink: func(ref *ReferencedLink) error { + return trackLinkReference(ref, tracker.links) + }, + ReferencedSecurityScheme: func(ref *ReferencedSecurityScheme) error { + return trackSecuritySchemeReference(ref, tracker.securitySchemes) + }, + // Track operation tags (only under allowed locations) + Operation: func(op *Operation) error { + return trackOperationTags(op, tracker) + }, + // Track security requirements (special case for security schemes) + Security: func(req *SecurityRequirement) error { + return trackSecurityRequirementNames(req, tracker.securitySchemes) + }, + }) + if err != nil { + return fmt.Errorf("failed to track references: %w", err) + } + } + + return nil +} + +// extractComponentTypeAndName returns component type and name from a JSON pointer location like: +// +// /components/schemas/User/... -> ("schemas", "User", true) +// +// Returns ok=false if the pointer does not point under /components/{type}/{name} +func extractComponentTypeAndName(ptr string) (typ, name string, ok bool) { + const prefix = "/components/" + if !strings.HasPrefix(ptr, prefix) { + return "", "", false + } + + parts := strings.Split(ptr, "/") + // Expect at least: "", "components", "{type}", "{name}", ... + if len(parts) < 4 { + return "", "", false + } + + typ = parts[2] + name = unescapeJSONPointerToken(parts[3]) + if typ == "" || name == "" { + return "", "", false + } + return typ, name, true +} + +// unescapeJSONPointerToken reverses JSON Pointer escaping (~1 => /, ~0 => ~) +func unescapeJSONPointerToken(s string) string { + // Per RFC 6901: ~1 is '/', ~0 is '~'. Order matters: replace ~1 first, then ~0. + s = strings.ReplaceAll(s, "~1", "/") + s = strings.ReplaceAll(s, "~0", "~") + return s +} + +// countTracked returns the total number of referenced components recorded in the tracker. +// This is used to detect a fixed point during reachability expansion. +func countTracked(tr *referencedComponentTracker) int { + if tr == nil { + return 0 + } + total := 0 + total += len(tr.schemas) + total += len(tr.responses) + total += len(tr.parameters) + total += len(tr.examples) + total += len(tr.requestBodies) + total += len(tr.headers) + total += len(tr.securitySchemes) + total += len(tr.links) + total += len(tr.callbacks) + total += len(tr.pathItems) + return total +} diff --git a/openapi/clean_paths_reachability_test.go b/openapi/clean_paths_reachability_test.go new file mode 100644 index 0000000..73abd25 --- /dev/null +++ b/openapi/clean_paths_reachability_test.go @@ -0,0 +1,284 @@ +package openapi_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/openapi" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Ensures Clean only preserves components reachable from /paths and security, +// and removes components that are only referenced from within components. +// +// Covers scenarios missed previously: +// - Keep schemas transitively reachable from operations (A -> B) +// - Remove self-referential and component-only cycles (Self, Cycle1, Cycle2) +// - Keep security schemes referenced by name via top-level security +// - Remove unused security schemes +func TestClean_ReachabilityFromPathsAndSecurity_Success(t *testing.T) { + t.Parallel() + ctx := t.Context() + + const yml = ` +openapi: 3.1.0 +info: + title: Test API + version: 1.0.0 +security: + - ApiKeyAuth: [] +paths: + /keep: + get: + responses: + "200": + description: ok + content: + application/json: + schema: + $ref: "#/components/schemas/A" +components: + schemas: + A: + type: object + properties: + b: + $ref: "#/components/schemas/B" + B: + type: string + Self: + $ref: "#/components/schemas/Self" + Cycle1: + $ref: "#/components/schemas/Cycle2" + Cycle2: + $ref: "#/components/schemas/Cycle1" + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: X-API-Key + UnusedScheme: + type: http + scheme: bearer +` + + // Unmarshal + doc, validationErrs, err := openapi.Unmarshal(ctx, strings.NewReader(yml)) + require.NoError(t, err, "unmarshal should succeed") + require.Empty(t, validationErrs, "input should be valid") + + // Clean + err = openapi.Clean(ctx, doc) + require.NoError(t, err, "clean should succeed") + + // Marshal and assert against expected YAML output + var buf bytes.Buffer + err = openapi.Marshal(ctx, doc, &buf) + require.NoError(t, err, "marshal should succeed") + actual := buf.String() + + const expected = `openapi: 3.1.0 +info: + title: Test API + version: 1.0.0 +security: + - ApiKeyAuth: [] +paths: + /keep: + get: + responses: + "200": + description: ok + content: + application/json: + schema: + $ref: "#/components/schemas/A" +components: + schemas: + A: + type: object + properties: + b: + $ref: "#/components/schemas/B" + B: + type: string + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: X-API-Key +` + + assert.Equal(t, expected, actual, "Clean should retain only reachable components (A, B) and ApiKeyAuth") +} + +// Ensures that when no paths (or top-level/operation security) reference components, +// purely self-referential or component-only cycles are all removed and the entire +// components section is dropped. +func TestClean_RemoveOnlySelfReferencedComponents_NoPaths_Success(t *testing.T) { + t.Parallel() + ctx := t.Context() + + const yml = ` +openapi: 3.1.0 +info: + title: Only Self-Referenced Components + version: 1.0.0 +paths: {} +components: + schemas: + Self: + $ref: "#/components/schemas/Self" + LoopA: + $ref: "#/components/schemas/LoopB" + LoopB: + $ref: "#/components/schemas/LoopA" + responses: + OnlyComponentResponse: + description: Component-only, not referenced from any path + content: + application/json: + schema: + $ref: "#/components/schemas/Self" + parameters: + OnlyComponentParameter: + name: "p" + in: query + schema: + type: string + requestBodies: + OnlyComponentRequestBody: + required: false + content: + application/json: + schema: + type: object + headers: + OnlyComponentHeader: + schema: + type: string + examples: + OnlyComponentExample: + value: + ok: true + links: + OnlyComponentLink: + description: Not referenced from paths + parameters: + id: "$response.body#/id" + callbacks: + OnlyComponentCallback: + "{$request.body#/cb}": + post: + requestBody: + content: + application/json: + schema: + type: object + responses: + "200": + description: ok + pathItems: + OnlyComponentPathItem: + get: + responses: + "200": + description: ok + securitySchemes: + UnusedApiKey: + type: apiKey + in: header + name: X-API-Key + UnusedBearer: + type: http + scheme: bearer +` + + doc, validationErrs, err := openapi.Unmarshal(ctx, strings.NewReader(yml)) + require.NoError(t, err, "unmarshal should succeed") + require.Empty(t, validationErrs, "input should be valid") + + err = openapi.Clean(ctx, doc) + require.NoError(t, err, "clean should succeed") + + // Marshal and assert against expected YAML output + var buf bytes.Buffer + err = openapi.Marshal(ctx, doc, &buf) + require.NoError(t, err, "marshal should succeed") + actual := buf.String() + + const expected = `openapi: 3.1.0 +info: + title: Only Self-Referenced Components + version: 1.0.0 +paths: {} +` + + assert.Equal(t, expected, actual, "All components should be removed when only self/component-only references exist") +} + +func TestClean_Reachability_CircularSchemas_Success(t *testing.T) { + t.Parallel() + ctx := t.Context() + + const yml = ` +openapi: 3.1.0 +info: + title: Test API + version: 1.0.0 +paths: + /keep: + get: + responses: + "200": + description: ok + content: + application/json: + schema: + $ref: "#/components/schemas/Cycle1" +components: + schemas: + Cycle1: + $ref: "#/components/schemas/Cycle2" + Cycle2: + $ref: "#/components/schemas/Cycle1" +` + + doc, validationErrs, err := openapi.Unmarshal(ctx, strings.NewReader(yml)) + require.NoError(t, err, "unmarshal should succeed") + require.Empty(t, validationErrs, "input should be valid") + + err = openapi.Clean(ctx, doc) + require.NoError(t, err, "clean should succeed") + + var buf bytes.Buffer + err = openapi.Marshal(ctx, doc, &buf) + require.NoError(t, err, "marshal should succeed") + actual := buf.String() + + const expected = `openapi: 3.1.0 +info: + title: Test API + version: 1.0.0 +paths: + /keep: + get: + responses: + "200": + description: ok + content: + application/json: + schema: + $ref: "#/components/schemas/Cycle1" +components: + schemas: + Cycle1: + $ref: "#/components/schemas/Cycle2" + Cycle2: + $ref: "#/components/schemas/Cycle1" +` + + assert.Equal(t, expected, actual, "Clean should keep circularly referenced components reachable from paths without hanging") +} diff --git a/openapi/clean_tags_test.go b/openapi/clean_tags_test.go new file mode 100644 index 0000000..0b894d2 --- /dev/null +++ b/openapi/clean_tags_test.go @@ -0,0 +1,133 @@ +package openapi_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/openapi" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// When top-level tags include tags unused by any operation, Clean should remove the unused ones +// and keep only those referenced by operations, preserving order. +func TestClean_RemoveUnusedTopLevelTags_KeepReferenced_PreserveOrder(t *testing.T) { + t.Parallel() + ctx := t.Context() + + const yml = ` +openapi: 3.1.0 +info: + title: Tags Test + version: 1.0.0 +tags: + - name: users + description: "Users related operations" + - name: admin + description: "Administrative operations" + - name: pets + description: "Pet operations" +paths: + /users: + get: + tags: ["users"] + responses: + "200": + description: ok + /admin: + post: + tags: ["admin"] + responses: + "201": + description: created +` + + doc, validationErrs, err := openapi.Unmarshal(ctx, strings.NewReader(yml)) + require.NoError(t, err, "unmarshal should succeed") + require.Empty(t, validationErrs) + + err = openapi.Clean(ctx, doc) + require.NoError(t, err, "clean should succeed") + + var buf bytes.Buffer + err = openapi.Marshal(ctx, doc, &buf) + require.NoError(t, err, "marshal should succeed") + actual := buf.String() + + // Expect only users and admin tags remain (pets removed), preserve original order + const expected = `openapi: 3.1.0 +info: + title: Tags Test + version: 1.0.0 +tags: + - name: users + description: "Users related operations" + - name: admin + description: "Administrative operations" +paths: + /users: + get: + tags: ["users"] + responses: + "200": + description: ok + /admin: + post: + tags: ["admin"] + responses: + "201": + description: created +` + + assert.Equal(t, expected, actual, "unused top-level tags should be removed; referenced tags kept in original order") +} + +// When no operation references any tag, Clean should remove the entire top-level tags array. +func TestClean_RemoveAllTopLevelTags_WhenUnused(t *testing.T) { + t.Parallel() + ctx := t.Context() + + const yml = ` +openapi: 3.1.0 +info: + title: Tags Test + version: 1.0.0 +tags: + - name: users + - name: admin +paths: + /ping: + get: + responses: + "200": + description: pong +` + + doc, validationErrs, err := openapi.Unmarshal(ctx, strings.NewReader(yml)) + require.NoError(t, err, "unmarshal should succeed") + require.Empty(t, validationErrs) + + err = openapi.Clean(ctx, doc) + require.NoError(t, err, "clean should succeed") + + var buf bytes.Buffer + err = openapi.Marshal(ctx, doc, &buf) + require.NoError(t, err, "marshal should succeed") + actual := buf.String() + + // Expect the tags array to be removed completely + const expected = `openapi: 3.1.0 +info: + title: Tags Test + version: 1.0.0 +paths: + /ping: + get: + responses: + "200": + description: pong +` + + assert.Equal(t, expected, actual, "top-level tags should be removed entirely when unused") +} diff --git a/openapi/reference.go b/openapi/reference.go index ceee4c7..da75067 100644 --- a/openapi/reference.go +++ b/openapi/reference.go @@ -633,7 +633,7 @@ func joinReferenceChain(chain []string) string { return result } -func unmarshaler[T any, V interfaces.Validator[T], C marshaller.CoreModeler](o *OpenAPI) func(context.Context, *yaml.Node, bool) (*Reference[T, V, C], []error, error) { +func unmarshaler[T any, V interfaces.Validator[T], C marshaller.CoreModeler](_ *OpenAPI) func(context.Context, *yaml.Node, bool) (*Reference[T, V, C], []error, error) { return func(ctx context.Context, node *yaml.Node, skipValidation bool) (*Reference[T, V, C], []error, error) { var ref Reference[T, V, C] validationErrs, err := marshaller.UnmarshalNode(ctx, "reference", node, &ref)