File tree Expand file tree Collapse file tree 4 files changed +26
-20
lines changed
Expand file tree Collapse file tree 4 files changed +26
-20
lines changed Original file line number Diff line number Diff line change 11package httpx
22
33import (
4+ "context"
45 "crypto/tls"
56 "fmt"
67 "io"
@@ -25,7 +26,6 @@ import (
2526 pdhttputil "github.com/projectdiscovery/utils/http"
2627 stringsutil "github.com/projectdiscovery/utils/strings"
2728 urlutil "github.com/projectdiscovery/utils/url"
28- "golang.org/x/net/context"
2929 "golang.org/x/net/http2"
3030)
3131
Original file line number Diff line number Diff line change @@ -14,26 +14,23 @@ type PageTypeClassifier struct {
1414 classifier * naive_bayes.NaiveBayesClassifier
1515}
1616
17- func New () * PageTypeClassifier {
17+ func New () ( * PageTypeClassifier , error ) {
1818 classifier , err := naive_bayes .NewClassifierFromFileData (classifierData )
1919 if err != nil {
20- panic ( err )
20+ return nil , err
2121 }
22- return & PageTypeClassifier {classifier : classifier }
22+ return & PageTypeClassifier {classifier : classifier }, nil
2323}
2424
2525func (n * PageTypeClassifier ) Classify (html string ) string {
26- text := htmlToText (html )
27- if text == "" {
26+ text , err := htmlToText (html )
27+ if err != nil || text == "" {
2828 return "other"
2929 }
3030 return n .classifier .Classify (text )
3131}
3232
33- func htmlToText (html string ) string {
34- text , err := htmltomarkdown .ConvertString (html )
35- if err != nil {
36- panic (err )
37- }
38- return text
33+ // htmlToText safely converts HTML to text and protects against panics from Go's HTML parser.
34+ func htmlToText (html string ) (string , error ) {
35+ return htmltomarkdown .ConvertString (html )
3936}
Original file line number Diff line number Diff line change @@ -3,19 +3,22 @@ package pagetypeclassifier
33import (
44 "testing"
55
6- "github.com/stretchr/testify/assert "
6+ "github.com/stretchr/testify/require "
77)
88
99func TestPageTypeClassifier (t * testing.T ) {
1010
1111 t .Run ("test creation of new PageTypeClassifier" , func (t * testing.T ) {
12- epc := New ()
13- assert .NotNil (t , epc )
12+ epc , err := New ()
13+ require .NoError (t , err )
14+ require .NotNil (t , epc )
1415 })
1516
1617 t .Run ("test classification non error page text" , func (t * testing.T ) {
17- epc := New ()
18- assert .Equal (t , "nonerror" , epc .Classify (`<!DOCTYPE html>
18+ epc , err := New ()
19+ require .NoError (t , err )
20+ require .NotNil (t , epc )
21+ require .Equal (t , "nonerror" , epc .Classify (`<!DOCTYPE html>
1922 <html lang="en">
2023 <head>
2124 <meta charset="UTF-8">
@@ -30,8 +33,10 @@ func TestPageTypeClassifier(t *testing.T) {
3033 })
3134
3235 t .Run ("test classification on error page text" , func (t * testing.T ) {
33- epc := New ()
34- assert .Equal (t , "error" , epc .Classify (`<!DOCTYPE html>
36+ epc , err := New ()
37+ require .NoError (t , err )
38+ require .NotNil (t , epc )
39+ require .Equal (t , "error" , epc .Classify (`<!DOCTYPE html>
3540 <html>
3641 <head>
3742 <title>Error 403: Forbidden</title>
Original file line number Diff line number Diff line change @@ -385,7 +385,11 @@ func New(options *Options) (*Runner, error) {
385385 }
386386
387387 runner .simHashes = gcache.New [uint64 , struct {}](1000 ).ARC ().Build ()
388- runner .pageTypeClassifier = pagetypeclassifier .New ()
388+ pageTypeClassifier , err := pagetypeclassifier .New ()
389+ if err != nil {
390+ return nil , err
391+ }
392+ runner .pageTypeClassifier = pageTypeClassifier
389393
390394 if options .HttpApiEndpoint != "" {
391395 apiServer := NewServer (options .HttpApiEndpoint , options )
You can’t perform that action at this time.
0 commit comments