@@ -4,23 +4,32 @@ import (
44 "archive/tar"
55 "bytes"
66 "encoding/csv"
7+ "fmt"
78 "index/suffixarray"
89 "io"
910 "log"
1011 "os"
12+ paths "path"
1113 "regexp"
1214 "sort"
1315 "strings"
1416
1517 "github.com/ekzhu/minhash-lsh"
1618 "github.com/sergi/go-diff/diffmatchpatch"
1719
20+ "gopkg.in/src-d/go-license-detector.v2/licensedb/filer"
1821 "gopkg.in/src-d/go-license-detector.v2/licensedb/internal/assets"
1922 "gopkg.in/src-d/go-license-detector.v2/licensedb/internal/fastlog"
2023 "gopkg.in/src-d/go-license-detector.v2/licensedb/internal/normalize"
2124 "gopkg.in/src-d/go-license-detector.v2/licensedb/internal/wmh"
2225)
2326
27+ var (
28+ licenseReadmeMentionRe = regexp .MustCompile (
29+ fmt .Sprintf ("(?i)[^\\ s]+/[^/\\ s]*(%s)[^\\ s]*" ,
30+ strings .Join (licenseFileNames , "|" )))
31+ )
32+
2433// database holds the license texts, their hashes and the hashtables to query for nearest
2534// neighbors.
2635type database struct {
@@ -411,18 +420,29 @@ func (db *database) scanForURLs(text string) map[string]bool {
411420}
412421
413422// QueryReadmeText tries to detect licenses mentioned in the README.
414- func (db * database ) QueryReadmeText (text string ) map [string ]float32 {
415- candidates1 := investigateReadmeFile (text , db .nameSubstrings , db .nameSubstringSizes )
416- candidates2 := investigateReadmeFile (text , db .nameShortSubstrings , db .nameShortSubstringSizes )
423+ func (db * database ) QueryReadmeText (text string , fs filer.Filer ) map [string ]float32 {
417424 candidates := map [string ]float32 {}
418- for key , val := range candidates1 {
419- candidates [key ] = val
425+ append := func (others map [string ]float32 ) {
426+ for key , val := range others {
427+ if candidates [key ] < val {
428+ candidates [key ] = val
429+ }
430+ }
420431 }
421- for key , val := range candidates2 {
422- if candidates [key ] < val {
423- candidates [key ] = val
432+ for _ , match := range licenseReadmeMentionRe .FindAllString (text , - 1 ) {
433+ match = strings .TrimRight (match , ".,:;-" )
434+ content , err := fs .ReadFile (match )
435+ if err == nil {
436+ if preprocessor , exists := filePreprocessors [paths .Ext (match )]; exists {
437+ content = preprocessor (content )
438+ }
439+ append (db .QueryLicenseText (string (content )))
424440 }
425441 }
442+ if len (candidates ) == 0 {
443+ append (investigateReadmeFile (text , db .nameSubstrings , db .nameSubstringSizes ))
444+ append (investigateReadmeFile (text , db .nameShortSubstrings , db .nameShortSubstringSizes ))
445+ }
426446 if db .debug {
427447 for key , val := range candidates {
428448 println ("NLP" , key , val )
0 commit comments