@@ -2,63 +2,38 @@ package main
22
33import (
44 "encoding/csv"
5- "encoding/json "
5+ "fmt "
66 "log"
77 "math/rand"
88 "net/http"
99 "os"
10- "sort"
1110 "strconv"
12- "strings"
1311 "time"
1412
13+ "github.com/algo7/TripAdvisor-Review-Scraper/scraper/internal/config"
1514 "github.com/algo7/TripAdvisor-Review-Scraper/scraper/pkg/tripadvisor"
16- )
17-
18- var (
19- // LANGUAGES is a slice of languages to be used for scraping, default is English
20- // var LANGUAGES = []string{"en", "fr", "pt", "es", "de", "it", "ru", "ja", "zh", "ko", "nl", "sv", "da", "fi", "no", "pl", "hu", "cs", "el", "tr", "th", "ar", "he", "id", "ms", "vi", "tl", "uk", "ro", "bg", "hr", "sr", "sk", "sl", "et", "lv", "lt", "sq", "mk", "hi", "bn", "pa", "gu", "ta", "te", "kn", "ml", "mr", "ur", "fa", "ne", "si", "my", "km", "lo", "am", "ka", "hy", "az", "uz", "tk", "ky", "tg", "mn", "bo", "sd", "ps", "ku", "gl", "eu", "ca", "is", "af", "xh", "zu", "ny", "st", "tn", "sn", "sw", "rw", "so", "mg", "eo", "cy", "gd", "gv", "ga", "mi", "sm", "to", "haw", "id", "jw"}
21- LANGUAGES = []string {"en" }
22-
23- // FILETYPE is the type of file to be saved, default is csv
24- FILETYPE = "csv"
15+ "github.com/algo7/TripAdvisor-Review-Scraper/scraper/pkg/utils"
2516)
2617
2718func main () {
2819 // Scraper variables
2920 var allReviews []tripadvisor.Review
3021 var location tripadvisor.Location
3122
32- // Get the location URL from the environment variable
33- locationURL := os .Getenv ("LOCATION_URL" )
34- log .Printf ("Location URL: %s" , locationURL )
35-
36- // Get the languages from the environment variable of use "en" as default
37- languages := LANGUAGES
38- if os .Getenv ("LANGUAGES" ) != "" {
39- languages = strings .Split (os .Getenv ("LANGUAGES" ), "|" )
40- }
41- log .Printf ("Languages: %v" , languages )
42-
43- // Get the file type from the environment variable or use "csv" as default
44- fileType := FILETYPE
45- if os .Getenv ("FILETYPE" ) != "" {
46- fileType = os .Getenv ("FILETYPE" )
47- }
48- if fileType != "csv" && fileType != "json" {
49- log .Fatal ("Invalid file type. Use csv or json" )
23+ config , err := config .NewConfig ()
24+ if err != nil {
25+ log .Fatalf ("Error creating scrape config: %v" , err )
5026 }
51- log .Printf ("File Type: %s" , fileType )
5227
5328 // Get the query type from the URL
54- queryType := tripadvisor .GetURLType (locationURL )
29+ queryType := tripadvisor .GetURLType (config . LocationURL )
5530 if queryType == "" {
5631 log .Fatal ("Invalid URL" )
5732 }
5833 log .Printf ("Location Type: %s" , queryType )
5934
6035 // Parse the location ID and location name from the URL
61- locationID , locationName , err := tripadvisor .ParseURL (locationURL , queryType )
36+ locationID , locationName , err := tripadvisor .ParseURL (config . LocationURL , queryType )
6237 if err != nil {
6338 log .Fatalf ("Error parsing URL: %v" , err )
6439 }
@@ -68,34 +43,31 @@ func main() {
6843 // Get the query ID for the given query type.
6944 queryID := tripadvisor .GetQueryID (queryType )
7045 if err != nil {
71- log .Fatal ("The location ID must be an positive integer" )
46+ log .Fatal ("The location ID must be a positive integer" )
7247 }
7348
74- // Get the proxy host if set
75- proxyHost := os .Getenv ("PROXY_HOST" )
76-
7749 // The default HTTP client
7850 client := & http.Client {}
7951
8052 // If the proxy host is set, use the proxy client
81- if proxyHost != "" {
53+ if config . ProxyHost != "" {
8254
8355 // Get the HTTP client with the proxy
84- client , err = tripadvisor .GetHTTPClientWithProxy (proxyHost )
56+ client , err = tripadvisor .GetHTTPClientWithProxy (config . ProxyHost )
8557 if err != nil {
86- log .Fatalf ("Error creating HTTP client with the give proxy %s: %v" , proxyHost , err )
58+ log .Fatalf ("Error creating HTTP client with the give proxy %s: %v" , config . ProxyHost , err )
8759 }
8860
8961 // Check IP
90- ip , err := tripadvisor .CheckIP (client )
62+ ip , err := utils .CheckIP (client )
9163 if err != nil {
9264 log .Fatalf ("Error checking IP: %v" , err )
9365 }
9466 log .Printf ("Proxy IP: %s" , ip )
9567 }
9668
9769 // Fetch the review count for the given location ID
98- reviewCount , err := tripadvisor .FetchReviewCount (client , locationID , queryType , languages )
70+ reviewCount , err := tripadvisor .FetchReviewCount (client , locationID , queryType , config . Languages )
9971 if err != nil {
10072 log .Fatalf ("Error fetching review count: %v" , err )
10173 }
@@ -105,7 +77,7 @@ func main() {
10577 log .Printf ("Review count: %d" , reviewCount )
10678
10779 // Create a file to save the reviews data
108- fileName := "reviews." + fileType
80+ fileName := fmt . Sprintf ( "reviews.%s" , config . FileType )
10981 fileHandle , err := os .Create (fileName )
11082 if err != nil {
11183 log .Fatalf ("Error creating file %s: %v" , fileName , err )
@@ -131,7 +103,7 @@ func main() {
131103 offset := tripadvisor .CalculateOffset (i )
132104
133105 // Make the request to the TripAdvisor GraphQL endpoint
134- resp , err := tripadvisor .MakeRequest (client , queryID , languages , locationID , offset , 20 )
106+ resp , err := tripadvisor .MakeRequest (client , queryID , config . Languages , locationID , offset , 20 )
135107 if err != nil {
136108 log .Fatalf ("Error making request at iteration %d: %v" , i , err )
137109 }
@@ -156,7 +128,7 @@ func main() {
156128 // Store the location data
157129 location = response [0 ].Data .Locations [0 ].Location
158130
159- if fileType == "csv" {
131+ if config . FileType == "csv" {
160132 // Iterating over the reviews
161133 for _ , row := range reviews {
162134 row := []string {
@@ -177,7 +149,8 @@ func main() {
177149 }
178150
179151 }
180- if fileType == "csv" {
152+
153+ if config .FileType == "csv" {
181154 // Create a new csv writer. We are using writeAll so defer writer.Flush() is not required
182155 writer := csv .NewWriter (fileHandle )
183156
@@ -192,45 +165,19 @@ func main() {
192165 if err != nil {
193166 log .Fatalf ("Error writing data to csv: %v" , err )
194167 }
195- } else {
196- // Write the data to the JSON file
197- const layout = "2006-01-02"
198-
199- sort .Slice (allReviews , func (i , j int ) bool {
200- iTime , err := time .Parse (layout , allReviews [i ].CreatedDate )
201- if err != nil {
202- log .Fatalf ("Error parsing time: %v" , err )
203- }
204-
205- jTime , err := time .Parse (layout , allReviews [j ].CreatedDate )
206- if err != nil {
207- log .Fatalf ("Error parsing time: %v" , err )
208- }
168+ }
209169
210- return jTime .After (iTime )
211- })
170+ // If the file type is JSON, write the data to the file
171+ if config .FileType == "json" {
172+ // Sort the reviews by date
173+ tripadvisor .SortReviewsByDate (allReviews )
212174
213- feedback := tripadvisor.Feedback {
214- Location : location ,
215- Reviews : allReviews ,
216- }
217- data , err := json .Marshal (feedback )
218- if err != nil {
219- log .Fatalf ("Could not marshal data: %v" , err )
220- }
221- _ , err = fileHandle .Write (data )
175+ // Write the data to the JSON file
176+ err := tripadvisor .WriteReviewsToJSONFile (allReviews , location , fileHandle )
222177 if err != nil {
223- log .Fatalf ("Could not write data: %v" , err )
178+ log .Fatalf ("Error writing data to JSON file : %v" , err )
224179 }
225180 }
226-
227181 log .Printf ("Data written to %s" , fileName )
228182 log .Println ("Scrapping completed" )
229183}
230-
231- func init () {
232- // Check if the environment variables are set
233- if os .Getenv ("LOCATION_URL" ) == "" {
234- log .Fatal ("LOCATION_URL not set" )
235- }
236- }
0 commit comments