forked from hbollon/IGopher
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapping.go
More file actions
141 lines (128 loc) · 4.39 KB
/
scrapping.go
File metadata and controls
141 lines (128 loc) · 4.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package igopher
import (
"errors"
"fmt"
"time"
"github.com/sirupsen/logrus"
"github.com/tebeka/selenium"
"github.com/vbauerster/mpb/v6"
"github.com/vbauerster/mpb/v6/decor"
)
// ScrapperConfig store scrapper configuration for user fetching
// It also store fetched usernames
type ScrapperConfig struct {
SrcAccounts []string `yaml:"src_accounts"`
FetchedAccounts []string
Quantity int `yaml:"fetch_quantity" validate:"numeric"`
}
// FetchUsersFromUserFollowers scrap username list from users followers.
// Source accounts and quantity are set by the bot user.
func (sc *IGopher) FetchUsersFromUserFollowers() ([]string, error) {
logrus.Info("Fetching users from users followers...")
var igUsers []string
// Valid configuration checking before fetching process
if len(sc.ScrapperManager.SrcAccounts) == 0 || sc.ScrapperManager.SrcAccounts == nil {
return nil, errors.New("No source users are set, please check your scrapper settings and retry")
}
if sc.ScrapperManager.Quantity <= 0 {
return nil, errors.New("Scrapping quantity is null or negative, please check your scrapper settings and retry")
}
p := mpb.New(
mpb.WithWidth(60),
mpb.WithRefreshRate(180*time.Millisecond),
)
totalBar := p.Add(int64(len(sc.ScrapperManager.SrcAccounts)),
mpb.NewBarFiller("[=>-|"),
mpb.BarRemoveOnComplete(),
mpb.PrependDecorators(
decor.CountersNoUnit("%d / %d"),
),
mpb.AppendDecorators(
decor.Percentage(),
),
)
for _, srcUsername := range sc.ScrapperManager.SrcAccounts {
logrus.Debugf("Fetch from '%s' user", srcUsername)
finded, err := sc.navigateUserFollowersList(srcUsername)
if !finded || err != nil {
totalBar.IncrBy(1)
continue
}
userBar := p.Add(int64(sc.ScrapperManager.Quantity),
mpb.NewBarFiller("[=>-|"),
mpb.BarRemoveOnComplete(),
mpb.PrependDecorators(
decor.Name(fmt.Sprintf("Scrapping users from %s account: ", srcUsername)),
decor.CountersNoUnit("%d / %d"),
),
mpb.AppendDecorators(
decor.Percentage(),
),
)
// Scrap users until it has the right amount defined in ScrapperManager.Quantity by the user
var scrappedUsers []selenium.WebElement
for len(scrappedUsers) < sc.ScrapperManager.Quantity {
if len(scrappedUsers) != 0 {
// Scroll to the end of the list to gather more followers from ig
_, err = sc.SeleniumStruct.WebDriver.ExecuteScript("window.scrollTo(0, document.body.scrollHeight);", nil)
if err != nil {
logrus.Warnf(
"Error during followers dialog box scroll for '%s' user. The user certainly did not have enough followers for the request",
srcUsername,
)
userBar.Abort(true)
break
}
}
randomSleepCustom(3, 4)
scrappedUsers, err = sc.SeleniumStruct.GetElements("//*/li/div/div/div/div/a", "xpath")
if err != nil {
logrus.Errorf(
"Error during users scrapping from followers dialog box for '%s' user",
srcUsername,
)
userBar.Abort(true)
break
}
scrappedUsers = sc.Blacklist.FilterScrappedUsers(scrappedUsers)
userBar.SetCurrent(int64(len(scrappedUsers)))
logrus.Debugf("Users count finded: %d", len(scrappedUsers))
}
if len(scrappedUsers) != 0 {
for _, user := range scrappedUsers {
username, err := user.Text()
if err == nil {
igUsers = append(igUsers, username)
}
}
}
logrus.Debugf("Scrapped users: %v\n", igUsers)
if !userBar.Completed() {
userBar.Abort(true)
}
totalBar.IncrBy(1)
}
p.Wait()
if len(igUsers) == 0 {
return nil, errors.New("Empty users result")
}
return igUsers, nil
}
// Go to user followers list with webdriver
func (sc *IGopher) navigateUserFollowersList(srcUsername string) (bool, error) {
// Navigate to Instagram user page
if err := sc.SeleniumStruct.WebDriver.Get(fmt.Sprintf("https://www.instagram.com/%s/?hl=en", srcUsername)); err != nil {
logrus.Warnf("Requested user '%s' doesn't exist, skip it", srcUsername)
return false, errors.New("Error during access to requested user")
}
randomSleepCustom(1, 3)
// Access to followers list view
if find, err := sc.SeleniumStruct.WaitForElement("//*[@id=\"react-root\"]/div/div/section/main/div/ul/li[2]/a", "xpath", 10); err == nil && find {
elem, _ := sc.SeleniumStruct.GetElement("//*[@id=\"react-root\"]/div/div/section/main/div/ul/li[2]/a", "xpath")
elem.Click()
logrus.Debug("Clicked on user followers list")
} else {
return true, errors.New("Error during access to user followers list")
}
return true, nil
}