Skip to content

Commit a9a3d96

Browse files
authored
Merge pull request #17 from rzhade3/rzhade3/refactor-matching
Output Shortlinks and refactor searching code
2 parents b988e80 + d2afdc3 commit a9a3d96

File tree

4 files changed

+69
-63
lines changed

4 files changed

+69
-63
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ You can download the pre-built binaries from the [releases](https://github.com/u
3737

3838
urlhunter requires 3 parameters to run: `-keywords`, `-date` and `-o`.
3939

40-
For example: `urlhunter -keywords keywords.txt -date 2020-11-20 -o out.txt`
40+
For example: `urlhunter --keywords keywords.txt --date 2020-11-20 --o out.txt`
4141

4242
### --keywords
4343

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ go 1.15
44

55
require (
66
github.com/fatih/color v1.10.0
7+
github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3
78
github.com/schollz/progressbar/v3 v3.7.1
89
)

go.sum

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
23
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
34
github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg=
45
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
@@ -11,11 +12,14 @@ github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/Qd
1112
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
1213
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
1314
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
15+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
1416
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
15-
github.com/schollz/progressbar v1.0.0 h1:gbyFReLHDkZo8mxy/dLWMr+Mpb1MokGJ1FqCiqacjZM=
17+
github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3 h1:2YkbhM98YoshI0K0BD95IoCFx+KNN1L/G0P5WzY2kac=
18+
github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3/go.mod h1:iTcJ+0KrnJXKBZvYH/Q6GKLhFuiXzD3z2PRae7xWqpY=
1619
github.com/schollz/progressbar/v3 v3.7.1 h1:aQR/t6d+1nURSdoMn6c7n0vJi5xQ3KndpF0n7R5wrik=
1720
github.com/schollz/progressbar/v3 v3.7.1/go.mod h1:CG/f0JmacksUc6TkZToO7tVq4t03zIQSQUtTd7F9GR4=
1821
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
22+
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
1923
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
2024
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
2125
golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 h1:umElSU9WZirRdgu2yFHY0ayQkEnKiOC1TtM3fWXFnoU=

main.go

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"time"
2020

2121
"github.com/fatih/color"
22+
"github.com/rzhade3/beaconspec"
2223
"github.com/schollz/progressbar/v3"
2324
)
2425

@@ -62,7 +63,6 @@ var err error
6263
var archivesPath string
6364

6465
func main() {
65-
6666
var keywordFile string
6767
var dateParam string
6868
var outFile string
@@ -80,7 +80,6 @@ func main() {
8080
flag.Usage = func() { fmt.Print(usage) }
8181
flag.Parse()
8282

83-
8483
if keywordFile == "" || dateParam == "" || outFile == "" {
8584
crash("You must specify all arguments.", err)
8685
return
@@ -191,7 +190,7 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {
191190
_, err := Unzip(filepath.Join(archivesPath, fullname, item.Name), filepath.Join(archivesPath, fullname))
192191
if err != nil {
193192
os.Remove(filepath.Join(archivesPath, fullname, item.Name))
194-
crash(item.Name + " looks damaged. It's removed now. Run the program again to re-download.", err)
193+
crash(item.Name+" looks damaged. It's removed now. Run the program again to re-download.", err)
195194
}
196195
}
197196

@@ -227,10 +226,9 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {
227226
}
228227

229228
func searchFile(fileLocation string, keyword string, outfile string) {
230-
231229
var path string
232230

233-
if strings.HasPrefix(fileLocation, "archives"){
231+
if strings.HasPrefix(fileLocation, "archives") {
234232
path_parts := strings.Split(fileLocation, string(os.PathSeparator))
235233
path = filepath.Join(path_parts[1], path_parts[2])
236234
} else {
@@ -250,54 +248,71 @@ func searchFile(fileLocation string, keyword string, outfile string) {
250248
panic(err)
251249
}
252250
defer f.Close()
251+
252+
metadata, err := beaconspec.ReadMetadata(fileLocation)
253+
if err != nil {
254+
warning(err.Error())
255+
return
256+
}
257+
258+
var matcher func([]byte) bool
253259
if strings.HasPrefix(keyword, "regex") {
254-
regexValue := strings.Split(keyword, " ")[1]
255-
r, err := regexp.Compile(regexValue)
256-
if err != nil {
257-
warning("Invalid Regex!")
258-
return
259-
}
260-
for scanner.Scan() {
261-
if r.MatchString(scanner.Text()) {
262-
textToWrite := strings.Split(scanner.Text(), "|")[1]
263-
if _, err := f.WriteString(textToWrite + "\n"); err != nil {
264-
panic(err)
265-
}
266-
}
267-
}
260+
matcher, err = regexMatch(keyword)
261+
} else if strings.Contains(keyword, ",") {
262+
matcher, err = multiKeywordMatcher(keyword)
268263
} else {
269-
if strings.Contains(keyword, ",") {
270-
keywords := strings.Split(keyword, ",")
271-
for scanner.Scan() {
272-
foundFlag := true
273-
for i := 0; i < len(keywords); i++ {
274-
if bytes.Contains(scanner.Bytes(), []byte(keywords[i])) {
275-
continue
276-
} else {
277-
foundFlag = false
278-
}
279-
}
280-
if foundFlag {
281-
textToWrite := strings.Split(scanner.Text(), "|")[1]
282-
if _, err := f.WriteString(textToWrite + "\n"); err != nil {
283-
panic(err)
284-
}
285-
}
286-
}
264+
matcher, err = stringMatch(keyword)
265+
}
266+
if err != nil {
267+
warning(err.Error())
268+
return
269+
}
270+
271+
for scanner.Scan() {
272+
if matcher(scanner.Bytes()) {
287273

288-
} else {
289-
toFind := []byte(keyword)
290-
for scanner.Scan() {
291-
if bytes.Contains(scanner.Bytes(), toFind) {
292-
textToWrite := strings.Split(scanner.Text(), "|")[1]
293-
if _, err := f.WriteString(textToWrite + "\n"); err != nil {
294-
panic(err)
295-
}
296-
}
274+
line, err := beaconspec.ParseLine(scanner.Text(), metadata)
275+
if err != nil {
276+
panic(err)
277+
}
278+
textToWrite := fmt.Sprintf("%s,%s\n", line.Source, line.Target)
279+
if _, err := f.WriteString(textToWrite); err != nil {
280+
panic(err)
297281
}
298282
}
299283
}
284+
}
285+
286+
func regexMatch(keyword string) (func([]byte) bool, error) {
287+
regexValue := strings.Split(keyword, " ")[1]
288+
r, err := regexp.Compile(regexValue)
289+
return func(b []byte) bool {
290+
s := string(b)
291+
return r.MatchString(s)
292+
}, err
293+
}
300294

295+
func multiKeywordMatcher(keyword string) (func([]byte) bool, error) {
296+
keywords := strings.Split(keyword, ",")
297+
bytes_keywords := make([][]byte, len(keywords))
298+
for i, k := range keywords {
299+
bytes_keywords[i] = []byte(k)
300+
}
301+
return func(text []byte) bool {
302+
for _, k := range bytes_keywords {
303+
if !bytes.Contains(text, k) {
304+
return false
305+
}
306+
}
307+
return true
308+
}, nil
309+
}
310+
311+
func stringMatch(keyword string) (func([]byte) bool, error) {
312+
bytes_keyword := []byte(keyword)
313+
return func(b []byte) bool {
314+
return bytes.Contains(b, bytes_keyword)
315+
}, nil
301316
}
302317

303318
func ifArchiveExists(fullname string) bool {
@@ -367,20 +382,6 @@ func downloadFile(url string) {
367382
color.Green("Download Finished!")
368383
}
369384

370-
func ByteCountSI(b int64) string {
371-
const unit = 1000
372-
if b < unit {
373-
return fmt.Sprintf("%d B", b)
374-
}
375-
div, exp := int64(unit), 0
376-
for n := b / unit; n >= unit; n /= unit {
377-
div *= unit
378-
exp++
379-
}
380-
return fmt.Sprintf("%.1f %cB",
381-
float64(b)/float64(div), "kMGTPE"[exp])
382-
}
383-
384385
func Unzip(src string, dest string) ([]string, error) {
385386
var filenames []string
386387
r, err := zip.OpenReader(src)
@@ -447,4 +448,4 @@ func crash(message string, err error) {
447448

448449
func warning(message string) {
449450
color.Yellow("[WARNING]: " + message + "\n")
450-
}
451+
}

0 commit comments

Comments
 (0)