Skip to content

Commit 0aa8203

Browse files
authored
fix proxy mode failing for GH private packages (#137)
* fix proxy mode failing for GH private packages * skip analysis for private packages for proxy mode * introduce npmRegistryConfig and support for handling multiple parsers in future * refactor name and unexport npm config functions * rm unused function * rename & unexport npmRegistryURLParser * add e2e for malicious pkg blocked using proxy mode
1 parent aa5c528 commit 0aa8203

File tree

6 files changed

+207
-14
lines changed

6 files changed

+207
-14
lines changed

.github/workflows/pmg-e2e.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,27 @@ jobs:
372372
! pmg npm install nyc-config@10.0.0 || echo "Malicious package correctly blocked"
373373
cd .. && rm -rf malicious-test
374374
375+
- name: Test safedep-test-pkg is Blocked using Proxy mode
376+
run: |
377+
echo "Testing that safedep-test-pkg is blocked..."
378+
mkdir safedep-test-pkg-test && cd safedep-test-pkg-test
379+
pmg npm init -y
380+
# Attempt to install safedep-test-pkg - should fail
381+
if pmg --experimental-proxy-mode npm --no-cache --prefer-online i safedep-test-pkg@0.1.3; then
382+
echo "ERROR: safedep-test-pkg was not blocked!"
383+
exit 1
384+
else
385+
echo "SUCCESS: safedep-test-pkg correctly blocked"
386+
fi
387+
# Verify package is not installed locally
388+
if [ -d "node_modules/safedep-test-pkg" ]; then
389+
echo "ERROR: safedep-test-pkg found in node_modules!"
390+
exit 1
391+
else
392+
echo "SUCCESS: safedep-test-pkg not present in node_modules"
393+
fi
394+
cd .. && rm -rf safedep-test-pkg-test
395+
375396
- name: Test PMG Modes
376397
run: |
377398
echo "Testing different PMG modes..."

docs/demo/pmg-intro.tape

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,3 @@ Enter
2121
Type "npm install safedep-test-pkg"
2222
Enter
2323
Sleep 5s
24-

proxy/interceptors/npm_registry.go

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,28 @@ import (
99
"github.com/safedep/pmg/proxy"
1010
)
1111

12-
var (
13-
npmRegistryDomains = []string{
14-
"registry.npmjs.org",
15-
"registry.yarnpkg.com",
16-
}
17-
)
12+
var npmRegistryDomains = map[string]*npmRegistryConfig{
13+
"registry.npmjs.org": {
14+
Host: "registry.npmjs.org",
15+
SupportedForAnalysis: true,
16+
RegistryParser: npmParser{},
17+
},
18+
"registry.yarnpkg.com": {
19+
Host: "registry.yarnpkg.com",
20+
SupportedForAnalysis: true,
21+
RegistryParser: npmParser{},
22+
},
23+
"npm.pkg.github.com": {
24+
Host: "npm.pkg.github.com",
25+
SupportedForAnalysis: false, // Skip analysis for now (private packages, auth complexity)
26+
RegistryParser: githubParser{},
27+
},
28+
"pkg-npm.githubusercontent.com": {
29+
Host: "pkg-npm.githubusercontent.com",
30+
SupportedForAnalysis: false, // Skip analysis (blob storage, redirected downloads)
31+
RegistryParser: githubBlobParser{},
32+
},
33+
}
1834

1935
// NpmRegistryInterceptor intercepts NPM registry requests and analyzes packages for malware
2036
// It embeds baseRegistryInterceptor to reuse ecosystem agnostic functionality
@@ -46,8 +62,13 @@ func (i *NpmRegistryInterceptor) Name() string {
4662

4763
// ShouldIntercept determines if this interceptor should handle the given request
4864
func (i *NpmRegistryInterceptor) ShouldIntercept(ctx *proxy.RequestContext) bool {
49-
for _, domain := range npmRegistryDomains {
50-
if ctx.Hostname == domain || strings.HasSuffix(ctx.Hostname, "."+domain) {
65+
if _, exists := npmRegistryDomains[ctx.Hostname]; exists {
66+
return true
67+
}
68+
69+
// Check subdomain match
70+
for domain := range npmRegistryDomains {
71+
if strings.HasSuffix(ctx.Hostname, "."+domain) {
5172
return true
5273
}
5374
}
@@ -60,9 +81,26 @@ func (i *NpmRegistryInterceptor) ShouldIntercept(ctx *proxy.RequestContext) bool
6081
func (i *NpmRegistryInterceptor) HandleRequest(ctx *proxy.RequestContext) (*proxy.InterceptorResponse, error) {
6182
log.Debugf("[%s] Handling NPM registry request: %s", ctx.RequestID, ctx.URL.Path)
6283

63-
pkgInfo, err := parseNpmRegistryURL(ctx.URL.Path)
84+
// Get registry configuration
85+
config := getNpmRegistryConfigForHostname(ctx.Hostname)
86+
if config == nil {
87+
// Shouldn't happen if ShouldIntercept is working correctly
88+
log.Warnf("[%s] No registry config found for hostname: %s", ctx.RequestID, ctx.Hostname)
89+
return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil
90+
}
91+
92+
// Skip analysis for registries that are not supported for analysis
93+
if !config.SupportedForAnalysis {
94+
log.Debugf("[%s] Skipping analysis for %s registry (not supported for analysis): %s",
95+
ctx.RequestID, config.Host, ctx.URL.String())
96+
return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil
97+
}
98+
99+
// Parse URL using registry-specific strategy
100+
pkgInfo, err := config.RegistryParser.ParseURL(ctx.URL.Path)
64101
if err != nil {
65-
log.Warnf("[%s] Failed to parse NPM registry URL %s: %v", ctx.RequestID, ctx.URL.Path, err)
102+
log.Warnf("[%s] Failed to parse NPM registry URL %s for %s: %v",
103+
ctx.RequestID, ctx.URL.Path, config.Host, err)
66104
return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil
67105
}
68106

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package interceptors
2+
3+
import "strings"
4+
5+
// npmRegistryConfig defines configuration for npm registry endpoints
6+
type npmRegistryConfig struct {
7+
// Hostname
8+
Host string
9+
10+
// Whether this registry is supported for malware analysis
11+
SupportedForAnalysis bool
12+
13+
// Parser for the registry
14+
RegistryParser npmRegistryURLParser
15+
}
16+
17+
// getNpmRegistryConfigForHostname returns the configuration for a hostname (with subdomain matching)
18+
func getNpmRegistryConfigForHostname(hostname string) *npmRegistryConfig {
19+
// Check exact match first
20+
if config, exists := npmRegistryDomains[hostname]; exists {
21+
return config
22+
}
23+
24+
// Check subdomain match: hostname could be "cdn.registry.npmjs.org" matching "registry.npmjs.org"
25+
for endpoint, config := range npmRegistryDomains {
26+
if strings.HasSuffix(hostname, "."+endpoint) {
27+
return config
28+
}
29+
}
30+
31+
return nil
32+
}

proxy/interceptors/npm_url_parser.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,15 @@ type npmPackageInfo struct {
1313
IsScoped bool
1414
}
1515

16-
// parseNpmRegistryURL parses an NPM registry URL path to extract package information
16+
// npmRegistryURLParser defines the interface for parsing registry-specific URLs
17+
type npmRegistryURLParser interface {
18+
ParseURL(urlPath string) (*npmPackageInfo, error)
19+
}
20+
21+
type npmParser struct{}
22+
23+
// parseNpmRegistryURL parses standard NPM registry URL paths (registry.npmjs.org, registry.yarnpkg.com)
24+
// This function handles the standard npm registry URL format.
1725
//
1826
// Supported URL patterns:
1927
// - /package -> {Name: "package", Version: ""}
@@ -22,7 +30,7 @@ type npmPackageInfo struct {
2230
// - /@scope/package/1.0.0 -> {Name: "@scope/package", Version: "1.0.0", IsScoped: true}
2331
// - /package/-/package-1.0.0.tgz -> {Name: "package", Version: "1.0.0", IsTarball: true}
2432
// - /@scope/package/-/@scope-package-1.0.0.tgz -> {Name: "@scope/package", Version: "1.0.0", IsTarball: true, IsScoped: true}
25-
func parseNpmRegistryURL(urlPath string) (*npmPackageInfo, error) {
33+
func (n npmParser) ParseURL(urlPath string) (*npmPackageInfo, error) {
2634
// Remove leading and trailing slashes
2735
urlPath = strings.Trim(urlPath, "/")
2836

@@ -43,6 +51,32 @@ func parseNpmRegistryURL(urlPath string) (*npmPackageInfo, error) {
4351
return parseUnscopedPackageURL(segments)
4452
}
4553

54+
type githubParser struct{}
55+
56+
// ParseURL implements RegistryURLParser for GitHub npm registry
57+
func (g githubParser) ParseURL(urlPath string) (*npmPackageInfo, error) {
58+
// For now, just allow all GitHub npm registry requests through without analysis
59+
// TODO: Implement proper GitHub npm registry URL parsing when analysis is enabled
60+
// GitHub URLs follow patterns:
61+
// - /download/@owner/package/version/hash.tgz -> {Name: "package", Version: "1.0.0", IsTarball: true}
62+
// - /@owner/package (metadata requests)
63+
return &npmPackageInfo{
64+
IsTarball: false, // Mark as non-tarball to skip analysis
65+
}, nil
66+
}
67+
68+
type githubBlobParser struct{}
69+
70+
// ParseURL implements RegistryURLParser for GitHub blob storage
71+
func (g githubBlobParser) ParseURL(urlPath string) (*npmPackageInfo, error) {
72+
// For now, just allow all GitHub blob storage requests through without analysis
73+
// TODO: Implement proper GitHub blob storage URL parsing when analysis is enabled
74+
// Pattern: /npmregistryv2prod/blobs/{blob_id}/{package_name}/{version}/***
75+
return &npmPackageInfo{
76+
IsTarball: false, // Mark as non-tarball to skip analysis
77+
}, nil
78+
}
79+
4680
// parseScopedPackageURL parses a scoped package URL
4781
// Patterns:
4882
// - [@scope, package] -> @scope/package

proxy/interceptors/npm_url_parser_test.go

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ func TestParseNpmRegistryURL(t *testing.T) {
265265

266266
for _, tt := range tests {
267267
t.Run(tt.name, func(t *testing.T) {
268-
got, err := parseNpmRegistryURL(tt.urlPath)
268+
parser := npmParser{}
269+
got, err := parser.ParseURL(tt.urlPath)
269270

270271
if tt.wantErr {
271272
assert.Error(t, err)
@@ -280,3 +281,71 @@ func TestParseNpmRegistryURL(t *testing.T) {
280281
})
281282
}
282283
}
284+
285+
func TestGithubParser_ParseURL(t *testing.T) {
286+
tests := []struct {
287+
name string
288+
urlPath string
289+
wantIsTarball bool
290+
wantErr bool
291+
}{
292+
{
293+
name: "github metadata request",
294+
urlPath: "/@owner/package",
295+
wantIsTarball: false,
296+
wantErr: false,
297+
},
298+
{
299+
name: "github download request",
300+
urlPath: "/download/@owner/package/1.0.0/abc123.tgz",
301+
wantIsTarball: false,
302+
wantErr: false,
303+
},
304+
}
305+
306+
for _, tt := range tests {
307+
t.Run(tt.name, func(t *testing.T) {
308+
parser := githubParser{}
309+
got, err := parser.ParseURL(tt.urlPath)
310+
311+
if tt.wantErr {
312+
assert.Error(t, err)
313+
return
314+
}
315+
316+
assert.NoError(t, err)
317+
assert.Equal(t, tt.wantIsTarball, got.IsTarball)
318+
})
319+
}
320+
}
321+
322+
func TestGithubBlobParser_ParseURL(t *testing.T) {
323+
tests := []struct {
324+
name string
325+
urlPath string
326+
wantIsTarball bool
327+
wantErr bool
328+
}{
329+
{
330+
name: "github blob storage request",
331+
urlPath: "/npmregistryv2prod/blobs/132160241/gh-npm-pkg/1.0.0/abc123",
332+
wantIsTarball: false,
333+
wantErr: false,
334+
},
335+
}
336+
337+
for _, tt := range tests {
338+
t.Run(tt.name, func(t *testing.T) {
339+
parser := githubBlobParser{}
340+
got, err := parser.ParseURL(tt.urlPath)
341+
342+
if tt.wantErr {
343+
assert.Error(t, err)
344+
return
345+
}
346+
347+
assert.NoError(t, err)
348+
assert.Equal(t, tt.wantIsTarball, got.IsTarball)
349+
})
350+
}
351+
}

0 commit comments

Comments
 (0)