@@ -7,11 +7,11 @@ package soup
77import (
88 "errors"
99 "io/ioutil"
10+ "log"
1011 "net/http"
1112 "regexp"
1213 "strings"
1314
14- "github.com/anaskhan96/soup/fetch"
1515 "golang.org/x/net/html"
1616)
1717
@@ -41,7 +41,7 @@ func Header(n string, v string) {
4141
4242// Get returns the HTML returned by the url in string
4343func Get (url string ) (string , error ) {
44- defer fetch . CatchPanic ("Get()" )
44+ defer catchPanic ("Get()" )
4545 // Init a new HTTP client
4646 client := & http.Client {}
4747 req , err := http .NewRequest ("GET" , url , nil )
@@ -76,7 +76,7 @@ func Get(url string) (string, error) {
7676
7777// HTMLParse parses the HTML returning a start pointer to the DOM
7878func HTMLParse (s string ) Root {
79- defer fetch . CatchPanic ("HTMLParse()" )
79+ defer catchPanic ("HTMLParse()" )
8080 r , err := html .Parse (strings .NewReader (s ))
8181 if err != nil {
8282 if debug {
@@ -101,8 +101,8 @@ func HTMLParse(s string) Root {
101101// with or without attribute key and value specified,
102102// and returns a struct with a pointer to it
103103func (r Root ) Find (args ... string ) Root {
104- defer fetch . CatchPanic ("Find()" )
105- temp , ok := fetch . FindOnce (r .Pointer , args , false )
104+ defer catchPanic ("Find()" )
105+ temp , ok := findOnce (r .Pointer , args , false )
106106 if ok == false {
107107 if debug {
108108 panic ("Element `" + args [0 ] + "` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )
@@ -117,8 +117,8 @@ func (r Root) Find(args ...string) Root {
117117// and returns an array of structs, each having
118118// the respective pointers
119119func (r Root ) FindAll (args ... string ) []Root {
120- defer fetch . CatchPanic ("FindAll()" )
121- temp := fetch . FindAllofem (r .Pointer , args )
120+ defer catchPanic ("FindAll()" )
121+ temp := findAllofem (r .Pointer , args )
122122 if len (temp ) == 0 {
123123 if debug {
124124 panic ("Element `" + args [0 ] + "` with attributes `" + strings .Join (args [1 :], " " ) + "` not found" )
@@ -135,7 +135,7 @@ func (r Root) FindAll(args ...string) []Root {
135135// FindNextSibling finds the next sibling of the pointer in the DOM
136136// returning a struct with a pointer to it
137137func (r Root ) FindNextSibling () Root {
138- defer fetch . CatchPanic ("FindNextSibling()" )
138+ defer catchPanic ("FindNextSibling()" )
139139 nextSibling := r .Pointer .NextSibling
140140 if nextSibling == nil {
141141 if debug {
@@ -149,7 +149,7 @@ func (r Root) FindNextSibling() Root {
149149// FindPrevSibling finds the previous sibling of the pointer in the DOM
150150// returning a struct with a pointer to it
151151func (r Root ) FindPrevSibling () Root {
152- defer fetch . CatchPanic ("FindPrevSibling()" )
152+ defer catchPanic ("FindPrevSibling()" )
153153 prevSibling := r .Pointer .PrevSibling
154154 if prevSibling == nil {
155155 if debug {
@@ -163,7 +163,7 @@ func (r Root) FindPrevSibling() Root {
163163// FindNextElementSibling finds the next element sibling of the pointer in the DOM
164164// returning a struct with a pointer to it
165165func (r Root ) FindNextElementSibling () Root {
166- defer fetch . CatchPanic ("FindNextElementSibling()" )
166+ defer catchPanic ("FindNextElementSibling()" )
167167 nextSibling := r .Pointer .NextSibling
168168 if nextSibling == nil {
169169 if debug {
@@ -181,7 +181,7 @@ func (r Root) FindNextElementSibling() Root {
181181// FindPrevElementSibling finds the previous element sibling of the pointer in the DOM
182182// returning a struct with a pointer to it
183183func (r Root ) FindPrevElementSibling () Root {
184- defer fetch . CatchPanic ("FindPrevElementSibling()" )
184+ defer catchPanic ("FindPrevElementSibling()" )
185185 prevSibling := r .Pointer .PrevSibling
186186 if prevSibling == nil {
187187 if debug {
@@ -198,7 +198,7 @@ func (r Root) FindPrevElementSibling() Root {
198198
199199// Attrs returns a map containing all attributes
200200func (r Root ) Attrs () map [string ]string {
201- defer fetch . CatchPanic ("Attrs()" )
201+ defer catchPanic ("Attrs()" )
202202 if r .Pointer .Type != html .ElementNode {
203203 if debug {
204204 panic ("Not an ElementNode" )
@@ -208,12 +208,12 @@ func (r Root) Attrs() map[string]string {
208208 if len (r .Pointer .Attr ) == 0 {
209209 return nil
210210 }
211- return fetch . GetKeyValue (r .Pointer .Attr )
211+ return getKeyValue (r .Pointer .Attr )
212212}
213213
214214// Text returns the string inside a non-nested element
215215func (r Root ) Text () string {
216- defer fetch . CatchPanic ("Text()" )
216+ defer catchPanic ("Text()" )
217217 k := r .Pointer .FirstChild
218218checkNode:
219219 if k .Type != html .TextNode {
@@ -242,3 +242,74 @@ checkNode:
242242 }
243243 return ""
244244}
245+
246+ // Using depth first search to find the first occurrence and return
247+ func findOnce (n * html.Node , args []string , uni bool ) (* html.Node , bool ) {
248+ if uni == true {
249+ if n .Type == html .ElementNode && n .Data == args [0 ] {
250+ if len (args ) > 1 && len (args ) < 4 {
251+ for i := 0 ; i < len (n .Attr ); i ++ {
252+ if n .Attr [i ].Key == args [1 ] && n .Attr [i ].Val == args [2 ] {
253+ return n , true
254+ }
255+ }
256+ } else if len (args ) == 1 {
257+ return n , true
258+ }
259+ }
260+ }
261+ uni = true
262+ for c := n .FirstChild ; c != nil ; c = c .NextSibling {
263+ p , q := findOnce (c , args , true )
264+ if q != false {
265+ return p , q
266+ }
267+ }
268+ return nil , false
269+ }
270+
271+ // Using depth first search to find all occurrences and return
272+ func findAllofem (n * html.Node , args []string ) []* html.Node {
273+ var nodeLinks = make ([]* html.Node , 0 , 10 )
274+ var f func (* html.Node , []string , bool )
275+ f = func (n * html.Node , args []string , uni bool ) {
276+ if uni == true {
277+ if n .Data == args [0 ] {
278+ if len (args ) > 1 && len (args ) < 4 {
279+ for i := 0 ; i < len (n .Attr ); i ++ {
280+ if n .Attr [i ].Key == args [1 ] && n .Attr [i ].Val == args [2 ] {
281+ nodeLinks = append (nodeLinks , n )
282+ }
283+ }
284+ } else if len (args ) == 1 {
285+ nodeLinks = append (nodeLinks , n )
286+ }
287+ }
288+ }
289+ uni = true
290+ for c := n .FirstChild ; c != nil ; c = c .NextSibling {
291+ f (c , args , true )
292+ }
293+ }
294+ f (n , args , false )
295+ return nodeLinks
296+ }
297+
298+ // Returns a key pair value (like a dictionary) for each attribute
299+ func getKeyValue (attributes []html.Attribute ) map [string ]string {
300+ var keyvalues = make (map [string ]string )
301+ for i := 0 ; i < len (attributes ); i ++ {
302+ _ , exists := keyvalues [attributes [i ].Key ]
303+ if exists == false {
304+ keyvalues [attributes [i ].Key ] = attributes [i ].Val
305+ }
306+ }
307+ return keyvalues
308+ }
309+
310+ // Catch panics when they occur
311+ func catchPanic (fnName string ) {
312+ if r := recover (); r != nil {
313+ log .Println ("Error occurred in" , fnName , ":" , r )
314+ }
315+ }
0 commit comments