Skip to content

Commit 0158200

Browse files
authored
Merge pull request #49 from OSLL/developers-google-com-badges-updates
google_export updates
2 parents d0b0726 + fb434e0 commit 0158200

File tree

6 files changed

+238
-32
lines changed

6 files changed

+238
-32
lines changed

google_export/Dockerfile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
FROM python:3.11-slim
2+
3+
WORKDIR /app
4+
RUN apt update \
5+
&& apt upgrade -y\
6+
&& apt install -y curl
7+
8+
COPY requirements.txt requirements.txt
9+
10+
RUN pip3 install -r requirements.txt
11+
12+
COPY . .

google_export/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,30 @@
77
- -k | --key - ключ для экспорта badge'й (брал из dev консоли браузера в запросе, отфильтровав по badges и get запросам (параметр key в конце строки))
88
- -c | --curl_args - ключ для нахождения id пользователя, если оно представляется как строковое (аналогично предыдущему, фильтровать по GetProfile, POST-запрос, ключ в header'ах 'X-Goog-Api-Key').
99
- -t | --timeout - таймаут 1 запроса
10+
- -r | --repeat - количество повторных отправок запроса (10 - по умолчнию)
11+
12+
### По аналогии с соседними экспортерами:
13+
- --google_token - путь до токена для выгрузки в гугл таблицы
14+
- --table_id - id гугл-таблицы
15+
- --sheet_id - id листа в гугл таблице
16+
17+
- --yandex_token - токен для яндекс таблиц
18+
- --yandex_path путь на яндекс диске
19+
20+
### Вместо загрузки id из файлов можно использовать импорт из гугл-таблиц
21+
- --input_sheet_id - id листа в гугл таблице
22+
- --input_column_number - номер колонки (например 'B' - 2)
23+
- --input_column_skip - количество элементов чтобы пропустить
24+
25+
Пример:
26+
```bash
27+
python3 main.py -o 'fname.csv' -k 'ключ1' -c 'ключ2' --google_token "токен" --table_id id_таблицы --sheet_id=out --input_sheet_id=in --input_column_number=2 --input_column_skip=3 -t 2
28+
```
1029

1130
```bash
1231
python3 main.py -i 'ids' -o 'fname.csv' -k 'ключ1' -c 'ключ2' -t 0.1
1332
```
33+
### Для запуска через докер
34+
```bash
35+
docker run -it --entrypoint python3 <имя контейнера> main.py -i 'ids' -o 'fname.csv' -k 'ключ1' -c 'ключ2' -t 0.1"
36+
```

google_export/main.py

Lines changed: 82 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
import httpx
2-
import json
3-
import csv
41
import argparse
5-
import os
2+
import csv
3+
import json
64
import subprocess
5+
import time
76

7+
import httpx
8+
import pandas
9+
10+
import sheets
811

9-
def get_name(user_id: str, curl_args):
12+
13+
def get_name(user_id: str, curl_args, rec_limit=10):
1014
try:
1115
if type(user_id) != str or user_id.isdigit():
1216
status, output = subprocess.getstatusoutput(
@@ -22,10 +26,14 @@ def get_name(user_id: str, curl_args):
2226
user_id = user_id[1][4][0]
2327
return user_id
2428
except httpx.ConnectError:
25-
print('ConnectError')
29+
if rec_limit == 0:
30+
print('ConnectError')
31+
return ""
32+
time.sleep(0.1)
33+
return get_name(user_id, curl_args, rec_limit-1)
2634
return ""
2735

28-
def get_link(user_id: str, curl_args):
36+
def get_link(user_id: str, curl_args, rec_limit=10):
2937
try:
3038
if type(user_id) != str or user_id.isdigit():
3139
status, output = subprocess.getstatusoutput(
@@ -41,7 +49,10 @@ def get_link(user_id: str, curl_args):
4149
user_id = user_id[-1][-1]
4250
return user_id
4351
except httpx.ConnectError:
44-
print('ConnectError')
52+
if rec_limit == 0:
53+
print('ConnectError')
54+
return ""
55+
time.sleep(0.1)
4556
return ""
4657

4758
def get_id_by_name(user_id: str, curl_args):
@@ -56,12 +67,12 @@ def get_id_by_name(user_id: str, curl_args):
5667
user_id = user_id[1][31]
5768
return user_id
5869

59-
def get_awards_by_id(user_id: str | int, key: str, curl_args, timeout) -> dict:
70+
def get_awards_by_id(user_id: str | int, key: str, curl_args, timeout, rec_limit=10) -> dict:
6071
print(f'Processing id {user_id}')
6172
try:
6273
if not (type(user_id) != str or user_id.isdigit()):
6374
user_id = get_id_by_name(user_id, curl_args)
64-
75+
6576
c = httpx.get(f'https://developerprofiles-pa.clients6.google.com/v1/awards?access_token&locale&obfuscatedProfileId={user_id}&useBadges=true&key={key}',
6677
headers={
6778
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0',
@@ -96,28 +107,31 @@ def get_awards_by_id(user_id: str | int, key: str, curl_args, timeout) -> dict:
96107
}
97108
return award_titles
98109
except httpx.ConnectError:
99-
print('ConnectError')
100-
return {}
110+
if rec_limit == 0:
111+
print('ConnectError')
112+
return {}
113+
time.sleep(0.1)
114+
return get_awards_by_id(user_id, key, curl_args, timeout, rec_limit-1)
101115

102116

103-
def get_awards(ids: [str | int], key: str, curl_args, timeout) -> dict[set]:
104-
awards = {user_id: get_awards_by_id(user_id, key, curl_args, timeout) for user_id in ids}
117+
def get_awards(ids: [str | int], key: str, curl_args, timeout, rec_limit=10) -> dict[set]:
118+
awards = {user_id: get_awards_by_id(user_id, key, curl_args, timeout, rec_limit) for user_id in ids}
105119
return awards
106120

107121

108-
def write_to_local_csv(awards: dict[set], curl_args, fname: str = 'result.csv') -> None:
122+
def write_to_local_csv(awards: dict[set], curl_args, fname: str = 'result.csv', rec_limit=10) -> None:
109123
column_names = set()
110124
default_columns = [
111-
'id',
112-
'name',
125+
'id',
126+
'name',
113127
'link',
114-
'public_profile',
115-
'profile created',
128+
'public_profile',
129+
'profile created',
116130
]
117131

118132
for user_awards in awards.values():
119133
column_names.update(user_awards)
120-
column_names = default_columns + list(column_names)
134+
column_names = default_columns + sorted(list(column_names))
121135
with open(fname, 'w', newline='') as csvfile:
122136
award_writer = csv.writer(csvfile)
123137
award_writer.writerow(
@@ -127,10 +141,10 @@ def write_to_local_csv(awards: dict[set], curl_args, fname: str = 'result.csv')
127141
for user_awards in awards.items():
128142
row = [
129143
get_id_by_name(user_awards[0], curl_args),
130-
get_name(user_awards[0], curl_args),
131-
get_link(user_awards[0], curl_args),
132-
1 if len(user_awards[1]) else 0,
133-
user_awards[1].get('Joined the Google Developer Program'),
144+
get_name(user_awards[0], curl_args, rec_limit),
145+
get_link(user_awards[0], curl_args, rec_limit),
146+
1 if len(user_awards[1]) else 0,
147+
user_awards[1].get('Joined the Google Developer Program'),
134148
]
135149
for award_name in column_names[len(default_columns):]:
136150
row.append(user_awards[1][award_name] if award_name in user_awards[1] else 'No')
@@ -150,10 +164,49 @@ def write_to_local_csv(awards: dict[set], curl_args, fname: str = 'result.csv')
150164
parser.add_argument('-k', '--key')
151165
parser.add_argument('-c', '--curl_args')
152166
parser.add_argument('-t', '--timeout', type=float, default=1)
153-
args = parser.parse_args()
167+
parser.add_argument('-r', '--repeat', type=int, default=10)
168+
169+
parser.add_argument('--google_token', type=str, required=False, help='Specify path to google token file')
170+
parser.add_argument('--table_id', type=str, required=False)
171+
parser.add_argument('--sheet_id', type=str, required=False)
172+
parser.add_argument('--input_sheet_id', type=str, required=False)
173+
parser.add_argument('--input_column_number', type=int, required=False)
174+
parser.add_argument('--input_column_skip', type=int, required=False, default=0)
154175

155-
with open(args.ids_file) as file:
156-
lines = [line.rstrip() for line in file]
176+
parser.add_argument('--yandex_token', type=str, required=False)
177+
parser.add_argument('--yandex_path', type=str, required=False)
178+
179+
args = parser.parse_args()
180+
lines = None
181+
if args.ids_file:
182+
with open(args.ids_file) as file:
183+
lines = [line.rstrip() for line in file]
157184
# ids = lines
158-
q = get_awards(lines, args.key, args.curl_args, args.timeout)
159-
write_to_local_csv(q, args.curl_args, args.output)
185+
elif args.google_token and args.table_id and args.input_sheet_id:
186+
lines = sheets.read_ids_from_table(
187+
args.google_token,
188+
args.table_id,
189+
args.input_sheet_id,
190+
args.input_column_number
191+
)
192+
lines = sheets.cut_lines(lines, args.input_column_skip)
193+
else:
194+
print('set ids file or google table input')
195+
q = get_awards(lines, args.key, args.curl_args, args.timeout, args.repeat)
196+
write_to_local_csv(q, args.curl_args, args.output, args.repeat)
197+
if args.google_token and args.table_id and args.sheet_id:
198+
sheets.write_data_to_table(
199+
pandas.read_csv(args.output),
200+
args.google_token,
201+
args.table_id,
202+
args.sheet_id
203+
)
204+
205+
if args.yandex_token and args.yandex_path:
206+
import yandex_disk
207+
yandex_disk.DiskManager(
208+
yatoken=args.yandex_token
209+
).upload(
210+
args.output,
211+
args.yandex_path
212+
)

google_export/requirements.txt

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,43 @@
1+
annotated-types==0.7.0
12
anyio==4.6.0
3+
cachetools==5.5.1
24
certifi==2024.8.30
5+
charset-normalizer==3.4.1
36
distlib==0.3.8
47
exceptiongroup==1.2.2
5-
filelock==3.15.1
8+
filelock==3.16.1
9+
google-api-core==2.24.1
10+
google-api-python-client==2.160.0
11+
google-auth==2.38.0
12+
google-auth-httplib2==0.2.0
13+
google-auth-oauthlib==1.2.1
14+
googleapis-common-protos==1.66.0
615
h11==0.14.0
716
httpcore==1.0.5
17+
httplib2==0.22.0
818
httpx==0.27.2
919
idna==3.10
10-
platformdirs==4.2.2
20+
numpy==2.2.2
21+
oauthlib==3.2.2
22+
pandas==2.2.3
23+
platformdirs==4.3.6
24+
proto-plus==1.26.0
25+
protobuf==5.29.3
26+
pyasn1==0.6.1
27+
pyasn1_modules==0.4.1
28+
pydantic_core==2.27.2
29+
pygsheets==2.0.6
30+
pyparsing==3.2.1
31+
python-dateutil==2.9.0.post0
32+
pytz==2025.1
33+
requests==2.32.3
34+
requests-oauthlib==2.0.0
35+
rsa==4.9
36+
six==1.17.0
1137
sniffio==1.3.1
1238
typing_extensions==4.12.2
13-
virtualenv==20.26.2
39+
tzdata==2025.1
40+
uritemplate==4.1.1
41+
urllib3==2.3.0
42+
virtualenv==20.26.6
43+
yadisk==3.2.0

google_export/sheets.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import pygsheets
2+
3+
def read_ids_from_table(google_token, table_id, sheet_id, column_number):
4+
if google_token and sheet_id and table_id:
5+
gc = pygsheets.authorize(service_file=google_token)
6+
sh = gc.open_by_key(table_id)
7+
8+
try:
9+
sh.worksheets('title', sheet_id)
10+
except:
11+
sh.add_worksheet(sheet_id)
12+
13+
wk_content = sh.worksheet_by_title(sheet_id)
14+
15+
return wk_content.get_col(column_number, include_tailing_empty=False)
16+
17+
def cut_lines(lines: [str], skip=0):
18+
prefixes = (
19+
'https://g.dev/',
20+
'https://developers.google.com/profile/u/'
21+
)
22+
new_lines = lines[skip:]
23+
for i, line in enumerate(new_lines):
24+
for prefix in prefixes:
25+
if line.startswith(prefix):
26+
new_lines[i] = line[len(prefix):]
27+
return new_lines
28+
29+
30+
def write_data_to_table(df_data, google_token, table_id, sheet_id):
31+
df_data = df_data.sort_values(by='Joined the Google Developer Program')
32+
33+
if google_token and sheet_id and table_id:
34+
gc = pygsheets.authorize(service_file=google_token)
35+
sh = gc.open_by_key(table_id)
36+
37+
try:
38+
sh.worksheets('title', sheet_id)
39+
except:
40+
sh.add_worksheet(sheet_id)
41+
42+
wk_content = sh.worksheet_by_title(sheet_id)
43+
44+
wk_content.set_dataframe(df_data, 'A1', copy_head=True)

google_export/yandex_disk.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Script with DiskManager class and functions for moodle backup
2+
duplicate moodle_export/*
3+
"""
4+
from datetime import datetime
5+
from os import environ, path
6+
from logging import getLogger
7+
8+
import yadisk
9+
10+
11+
logger = getLogger()
12+
13+
14+
class DiskManager():
15+
"""Light YaDisk manager
16+
"""
17+
18+
def __init__(self, yatoken, download_path='./'):
19+
self.client = yadisk.Client(token=yatoken or environ.get('YADISK_TOKEN'))
20+
self.download_path = download_path
21+
22+
def upload(self, local_path: str, disk_path: str, overwrite=True):
23+
"""upload from local_path to disk_path
24+
25+
Args:
26+
local_path (str): path to local file
27+
disk_path (str): full path to file on yadisk
28+
overwrite (bool): overwrite file. Defaults to true
29+
"""
30+
logger.info("Uploading %s to %s", *(local_path, disk_path))
31+
self.client.upload(local_path, disk_path, overwrite=overwrite)
32+
33+
def download_file_from_disk(self, remote_path: str):
34+
"""_summary_
35+
36+
Args:
37+
remote_path (str): full path to file on yadisk
38+
39+
Returns:
40+
str: path to downloaded file
41+
"""
42+
local_path = self.download_path + path.basename(remote_path)
43+
self.client.download(remote_path, local_path)
44+
return local_path

0 commit comments

Comments
 (0)