Skip to content

Commit 1449d9e

Browse files
authored
chore: solving conflicts from release branch merging (#377)
1 parent b839add commit 1449d9e

File tree

6 files changed

+297
-45
lines changed

6 files changed

+297
-45
lines changed

pkg/analytics/analytics.go

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package analytics
22

33
import (
44
"bytes"
5+
"github.com/snyk/go-application-framework/pkg/logging"
56

67
//nolint:gosec // insecure sha1 used for legacy identifier
78
"crypto/sha1"
@@ -103,22 +104,8 @@ type dataOutput struct {
103104
Data analyticsOutput `json:"data"`
104105
}
105106

106-
var (
107-
// sensitiveFieldNames is a list of field names that should be sanitized.
108-
// data sanitization is used to prevent sensitive data from being sent to the analytics server.
109-
sensitiveFieldNames = []string{
110-
"headers",
111-
"user",
112-
"passw",
113-
"token",
114-
"key",
115-
"secret",
116-
}
117-
)
118-
119107
const (
120-
sanitizeReplacementString string = "REDACTED"
121-
apiEndpoint string = "/v1/analytics/cli"
108+
apiEndpoint string = "/v1/analytics/cli"
122109
)
123110

124111
// New creates a new Analytics instance.
@@ -266,7 +253,7 @@ func (a *AnalyticsImpl) GetRequest() (*http.Request, error) {
266253
return nil, err
267254
}
268255

269-
outputJson, err = SanitizeValuesByKey(sensitiveFieldNames, sanitizeReplacementString, outputJson)
256+
outputJson, err = SanitizeValuesByKey(logging.SENSITIVE_FIELD_NAMES, logging.SANITIZE_REPLACEMENT_STRING, outputJson)
270257
if err != nil {
271258
return nil, err
272259
}
@@ -275,7 +262,7 @@ func (a *AnalyticsImpl) GetRequest() (*http.Request, error) {
275262
if err != nil {
276263
return nil, err
277264
}
278-
outputJson, err = SanitizeUsername(user.Username, user.HomeDir, sanitizeReplacementString, outputJson)
265+
outputJson, err = SanitizeUsername(user.Username, user.HomeDir, logging.SANITIZE_REPLACEMENT_STRING, outputJson)
279266
if err != nil {
280267
return nil, err
281268
}

pkg/analytics/analytics_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package analytics
33
import (
44
"encoding/json"
55
"fmt"
6+
"github.com/snyk/go-application-framework/pkg/logging"
67
"io"
78
"net/http"
89
"os/user"
@@ -74,7 +75,7 @@ func Test_Basic(t *testing.T) {
7475
body, err := io.ReadAll(request.Body)
7576
assert.Nil(t, err)
7677
// expect no CLI args to be sent to analytics (CLI-586)
77-
assert.Equal(t, 0, strings.Count(string(body), sanitizeReplacementString))
78+
assert.Equal(t, 0, strings.Count(string(body), logging.SANITIZE_REPLACEMENT_STRING))
7879

7980
var requestBody dataOutput
8081
err = json.Unmarshal(body, &requestBody)
@@ -118,11 +119,11 @@ func Test_SanitizeValuesByKey(t *testing.T) {
118119
}
119120

120121
// test input
121-
filter := sensitiveFieldNames
122+
filter := logging.SENSITIVE_FIELD_NAMES
122123
input, err := json.Marshal(inputStruct)
123124
assert.NoError(t, err)
124125

125-
replacement := sanitizeReplacementString
126+
replacement := logging.SANITIZE_REPLACEMENT_STRING
126127

127128
fmt.Println("Before: " + string(input))
128129

@@ -171,7 +172,7 @@ func Test_SanitizeUsername(t *testing.T) {
171172
// 2. with domain name
172173
// 3. user name and path are different
173174
// 4. current OS values
174-
replacement := "REDACTED"
175+
replacement := "***"
175176
inputData := []input{
176177
{
177178
userName: "some.user",

pkg/analytics/instrumentation_collector.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"encoding/json"
55
"errors"
66
"fmt"
7+
"github.com/snyk/go-application-framework/pkg/logging"
78
"os/user"
89
"time"
910

@@ -180,7 +181,7 @@ func (ic *instrumentationCollectorImpl) sanitizeExtensionData(logger *zerolog.Lo
180181
}
181182

182183
var sanitized []byte
183-
sanitized, err = SanitizeValuesByKey(sensitiveFieldNames, sanitizeReplacementString, extension)
184+
sanitized, err = SanitizeValuesByKey(logging.SENSITIVE_FIELD_NAMES, logging.SANITIZE_REPLACEMENT_STRING, extension)
184185
if err != nil {
185186
logger.Printf("failed to sanitize extension, removing object from analytics payload as sanitzation was not possible: %v", err)
186187
return result
@@ -192,7 +193,7 @@ func (ic *instrumentationCollectorImpl) sanitizeExtensionData(logger *zerolog.Lo
192193
return result
193194
}
194195

195-
sanitized, err = SanitizeUsername(u.Username, u.HomeDir, sanitizeReplacementString, sanitized)
196+
sanitized, err = SanitizeUsername(u.Username, u.HomeDir, logging.SANITIZE_REPLACEMENT_STRING, sanitized)
196197
if err != nil {
197198
logger.Printf("failed to sanitize user information in extension payload, removing object from analytics payload as sanitzation was not possible: %v", err)
198199
return result

pkg/analytics/instrumentation_collector_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ func Test_InstrumentationCollector(t *testing.T) {
214214

215215
mockExtension := map[string]interface{}{
216216
"strings": "hello world",
217-
"password": "REDACTED",
217+
"password": "***",
218218
}
219219

220220
expectedV2InstrumentationObject.Data.Attributes.Interaction.Extension = &mockExtension

pkg/logging/scrubbingLogWriter.go

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"io"
2222
"os/user"
2323
"regexp"
24+
"sort"
2425
"strings"
2526
"sync"
2627
"time"
@@ -31,8 +32,18 @@ import (
3132
"github.com/snyk/go-application-framework/pkg/configuration"
3233
)
3334

34-
const redactMask string = "***"
3535
const MAX_WRITE_RETRIES = 10
36+
const SANITIZE_REPLACEMENT_STRING string = "***"
37+
38+
// SENSITIVE_FIELD_NAMES is a list of field names that should be sanitized.
39+
var SENSITIVE_FIELD_NAMES = []string{
40+
"headers",
41+
"user",
42+
"passw",
43+
"token",
44+
"key",
45+
"secret",
46+
}
3647

3748
type ScrubbingLogWriter interface {
3849
AddTerm(term string, matchGroup int)
@@ -146,43 +157,51 @@ func addMandatoryMasking(dict ScrubbingDict) ScrubbingDict {
146157
groupToRedact: 3,
147158
regex: regexp.MustCompile(s),
148159
}
160+
149161
s = fmt.Sprintf(`([t|T]oken )(%s)`, charGroup)
150162
dict[s] = scrubStruct{
151163
groupToRedact: 2,
152164
regex: regexp.MustCompile(s),
153165
}
166+
154167
s = fmt.Sprintf(`([b|B]earer )(%s)`, charGroup)
155168
dict[s] = scrubStruct{
156169
groupToRedact: 2,
157170
regex: regexp.MustCompile(s),
158171
}
172+
159173
s = fmt.Sprintf(`([b|B]asic )(%s)`, charGroup)
160174
dict[s] = scrubStruct{
161175
groupToRedact: 2,
162176
regex: regexp.MustCompile(s),
163177
}
178+
164179
s = fmt.Sprintf("(gh[ps])_(%s)", charGroup)
165180
dict[s] = scrubStruct{
166181
groupToRedact: 2,
167182
regex: regexp.MustCompile(s),
168183
}
184+
169185
s = fmt.Sprintf("(github_pat_)(%s)", charGroup)
170186
dict[s] = scrubStruct{
171187
groupToRedact: 2,
172188
regex: regexp.MustCompile(s),
173189
}
190+
174191
// github
175-
s = fmt.Sprintf("(access_token=)(%s)&", charGroup)
192+
s = fmt.Sprintf(`(access_token[\\="\s:]+)(%s)&?`, charGroup)
176193
dict[s] = scrubStruct{
177194
groupToRedact: 2,
178195
regex: regexp.MustCompile(s),
179196
}
180-
s = fmt.Sprintf("(refresh_token=)(%s)&", charGroup)
197+
198+
s = fmt.Sprintf(`(refresh_token[\\="\s:]+)(%s)&?`, charGroup)
181199
dict[s] = scrubStruct{
182200
groupToRedact: 2,
183201
regex: regexp.MustCompile(s),
184202
}
185-
s = fmt.Sprintf(`("token":)"(%s)"`, charGroup)
203+
204+
s = fmt.Sprintf(`(token[\\="\s:]+)(%s)&?`, charGroup)
186205
dict[s] = scrubStruct{
187206
groupToRedact: 2,
188207
regex: regexp.MustCompile(s),
@@ -194,12 +213,72 @@ func addMandatoryMasking(dict ScrubbingDict) ScrubbingDict {
194213
regex: regexp.MustCompile(s),
195214
}
196215

216+
// Hide whatever is the current username
197217
u, err := user.Current()
198218
if err == nil {
199219
s = fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(u.Username))
200220
addTermToDict(s, 0, dict)
201221
}
202222

223+
// The legacy CLI's snyk-config package prints the entire configuration in debug mode.
224+
// It begins with some pseudo-JSON structure, which we can redact.
225+
s = `(?s)_:\s*\[(?<everything_inside_hard_brackets>.*)\]`
226+
dict[s] = scrubStruct{
227+
groupToRedact: 1,
228+
regex: regexp.MustCompile(s),
229+
}
230+
231+
// JSON-formatted data, in general
232+
kws := strings.Join(SENSITIVE_FIELD_NAMES, "|")
233+
s = fmt.Sprintf(`(?i)"[^"]*(?<json_key>%s)[^"]*"\s*:\s*"(?<json_value>[^"]*)"`, kws)
234+
dict[s] = scrubStruct{
235+
groupToRedact: 2,
236+
regex: regexp.MustCompile(s),
237+
}
238+
239+
// CLI argument mapping from the snyk-config debug logging
240+
// I.e., if --argument=value is passed, it will be logged as { 'argument=value': true }
241+
s = fmt.Sprintf(`(?im)(%s)[^=]*=(?P<value>.*)['"]`, kws)
242+
dict[s] = scrubStruct{
243+
groupToRedact: 2,
244+
regex: regexp.MustCompile(s),
245+
}
246+
247+
// Same as above, only with short form
248+
shorts := []string{"p", "u"}
249+
shortForm := strings.Join(shorts, "")
250+
s = fmt.Sprintf(`(?im)'[%s]=(?<value>.*)'`, shortForm)
251+
dict[s] = scrubStruct{
252+
groupToRedact: 2,
253+
regex: regexp.MustCompile(s),
254+
}
255+
256+
// Specific short-form scrubbing of the JSON-ish log structures
257+
// Appear in the snyk-config debug logging as various constellations of { 'u': 'john.doe', } with or without quotes,
258+
// and values can contain spaces, double and/or single quotes.
259+
260+
s = fmt.Sprintf(`(?i)(?<short_form_key>\b[%s]\b)[,'":]+\s*(?:['"](?<short_form_value>.*)['"]|([^,'"\s]+))[,}]?`, shortForm)
261+
dict[s] = scrubStruct{
262+
groupToRedact: 2,
263+
regex: regexp.MustCompile(s),
264+
}
265+
266+
// CLI argument-style-specific scrubbing
267+
// Many cases are already covered by the JSON scrubbing above, thus this might seem incomplete.
268+
// Refer to the unit tests for the full set of covered cases.
269+
s = fmt.Sprintf(`(?im)\-[%s][\s=](?<short_form_value>\S*)`, shortForm)
270+
dict[s] = scrubStruct{
271+
groupToRedact: 1,
272+
regex: regexp.MustCompile(s),
273+
}
274+
275+
// Long-form, rest is covered by the JSON scrubbing above
276+
s = fmt.Sprintf(`(?im)--(?<argument_key>[^=\s]*(?:%s)[^=\s]*)[\s=]['"]?(?<argument_value>\S*)['"]?`, kws)
277+
dict[s] = scrubStruct{
278+
groupToRedact: 2,
279+
regex: regexp.MustCompile(s),
280+
}
281+
203282
return dict
204283
}
205284

@@ -212,10 +291,22 @@ func (w *scrubbingLevelWriter) Write(p []byte) (int, error) {
212291

213292
func scrub(p []byte, scrubDict ScrubbingDict) []byte {
214293
s := string(p)
215-
for _, entry := range scrubDict {
294+
295+
// The dictionary order is important here, as we want potentially overlapping regexes to be applied
296+
// in a specific order every time. Since dictionaries are unordered, we sort the keys here.
297+
keys := make([]string, 0, len(scrubDict))
298+
for k := range scrubDict {
299+
keys = append(keys, k)
300+
}
301+
sort.Strings(keys)
302+
for _, key := range keys {
303+
entry := scrubDict[key]
216304
matches := entry.regex.FindAllStringSubmatch(s, -1)
217305
for _, match := range matches {
218-
s = strings.Replace(s, match[entry.groupToRedact], redactMask, -1)
306+
if entry.groupToRedact >= len(match) || match[entry.groupToRedact] == "" {
307+
continue
308+
}
309+
s = strings.Replace(s, match[entry.groupToRedact], SANITIZE_REPLACEMENT_STRING, -1)
219310
}
220311
}
221312
return []byte(s)

0 commit comments

Comments
 (0)