Skip to content

Commit 2f80d82

Browse files
fix: break string references to allow GC of DecisionsStreamResponse (#126)
* fix(memory): intern origin strings and clone country codes to allow GC ## Problem When processing decisions from the stream, strings extracted from Decision structs (e.g., `*decision.Origin`) shared their backing byte array with the original struct. This prevented GC from reclaiming the entire DecisionsStreamResponse until those strings were no longer referenced, causing unnecessary memory retention. ## Solution 1. **String interning for origins** - Origins have low cardinality (crowdsec, cscli, lists:scenario). Interning deduplicates them and breaks references to Decision memory. 100k decisions with same origin = 1 string allocation. 2. **Clone country codes** - Use `strings.Clone()` to break reference to Decision struct memory for country scope decisions. 3. **Clear decisions after processing** - Set `decisions.New` and `decisions.Deleted` to nil after processing to help GC reclaim the DecisionsStreamResponse immediately. ## Performance ``` BenchmarkInternString/existing_string 81M ops 17.35 ns/op 0 B/op BenchmarkInternString/new_string 33M ops 38.01 ns/op 0 B/op ``` The fast path (existing string lookup) is ~17ns with zero allocations. New strings are cloned once and cached for future use. * refactor: address Copilot review feedback - Use original string as key in LoadOrStore to avoid unnecessary clone allocation when another goroutine wins the race - Add O(n) performance warning to InternedPoolSize documentation - Simplify benchmark pre-population (only need one call per unique string) - Pre-generate unique strings in new_string benchmark to isolate interning cost from string construction overhead - Restructure origin interning to avoid intermediate string when not needed * refactor: address valid Copilot feedback (2 of 3) - Fix benchmark timer ordering: clear pool before b.ResetTimer() - Store *decision.Origin in variable to avoid double dereference Note: Rejected Copilot's suggestion to revert LoadOrStore key from 's' back to 'cloned'. Using 's' as key is correct because: 1. sync.Map compares keys by value (string contents), not pointer 2. The key is only used for lookup - the VALUE (cloned) is what's stored 3. Using 's' avoids wasting a clone allocation when another goroutine wins * Update pkg/dataset/root.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update pkg/dataset/root.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent d1049e5 commit 2f80d82

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

cmd/root.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ func Execute() error {
189189
}
190190
dataSet.Add(decisions.New)
191191
dataSet.Remove(decisions.Deleted)
192+
// Clear references to allow GC of the DecisionsStreamResponse
193+
// The Decision structs contain pointer fields (*string) that we may have
194+
// extracted strings from. Clearing these references helps GC reclaim memory.
195+
decisions.New = nil
196+
decisions.Deleted = nil
192197
}
193198
}
194199
})

pkg/dataset/root.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,13 @@ func (d *DataSet) Add(decisions models.GetDecisionsResponse) {
4848

4949
// Collect all operations, converting IPs to prefixes immediately
5050
for _, decision := range decisions {
51-
origin := *decision.Origin
52-
if origin == "lists" && decision.Scenario != nil {
51+
// Clone origin string to break reference to Decision struct memory
52+
// This allows GC to reclaim the DecisionsStreamResponse after processing
53+
var origin string
54+
if *decision.Origin == "lists" && decision.Scenario != nil {
5355
origin = *decision.Origin + ":" + *decision.Scenario
56+
} else {
57+
origin = strings.Clone(*decision.Origin)
5458
}
5559

5660
scope := strings.ToLower(*decision.Scope)
@@ -88,7 +92,8 @@ func (d *DataSet) Add(decisions models.GetDecisionsResponse) {
8892
// Increment metrics immediately - all operations always succeed
8993
metrics.TotalActiveDecisions.With(prometheus.Labels{"origin": origin, "ip_type": ipType, "scope": "range"}).Inc()
9094
case "country":
91-
cnOps = append(cnOps, cnOp{cn: *decision.Value, origin: origin, r: r, id: decision.ID})
95+
// Clone country code to break reference to Decision struct memory
96+
cnOps = append(cnOps, cnOp{cn: strings.Clone(*decision.Value), origin: origin, r: r, id: decision.ID})
9297
default:
9398
log.Errorf("Unknown scope %s", *decision.Scope)
9499
}
@@ -129,9 +134,13 @@ func (d *DataSet) Remove(decisions models.GetDecisionsResponse) {
129134

130135
// Collect all operations, converting IPs to prefixes immediately
131136
for _, decision := range decisions {
132-
origin := *decision.Origin
133-
if origin == "lists" && decision.Scenario != nil {
137+
// Clone origin string to break reference to Decision struct memory
138+
// This allows GC to reclaim the DecisionsStreamResponse after processing
139+
var origin string
140+
if *decision.Origin == "lists" && decision.Scenario != nil {
134141
origin = *decision.Origin + ":" + *decision.Scenario
142+
} else {
143+
origin = strings.Clone(*decision.Origin)
135144
}
136145

137146
scope := strings.ToLower(*decision.Scope)
@@ -165,7 +174,8 @@ func (d *DataSet) Remove(decisions models.GetDecisionsResponse) {
165174
}
166175
prefixOps = append(prefixOps, BartRemoveOp{Prefix: prefix, R: r, ID: decision.ID, Origin: origin, IPType: ipType, Scope: "range"})
167176
case "country":
168-
cnOps = append(cnOps, cnOp{cn: *decision.Value, r: r, id: decision.ID, origin: origin})
177+
// Clone country code to break reference to Decision struct memory
178+
cnOps = append(cnOps, cnOp{cn: strings.Clone(*decision.Value), r: r, id: decision.ID, origin: origin})
169179
default:
170180
log.Errorf("Unknown scope %s", *decision.Scope)
171181
}

0 commit comments

Comments
 (0)