|
| 1 | +import json |
1 | 2 | import os
|
2 | 3 | from typing import Dict, List, Iterator
|
3 | 4 |
|
4 | 5 | from algoliasearch import algoliasearch
|
5 | 6 | from algoliasearch.index import Index
|
6 | 7 | from bs4 import Tag
|
7 |
| -from googleapiclient.discovery import build, Resource |
8 |
| -from oauth2client.service_account import ServiceAccountCredentials |
9 | 8 |
|
10 | 9 | from src.api import get_api_page
|
11 | 10 | from src.dist import get_dist_page_xml, dist_path
|
12 | 11 |
|
13 | 12 |
|
14 |
| -def initialize_analyticsreporting() -> Resource: |
15 |
| - credentials = ServiceAccountCredentials.from_json_keyfile_name( |
16 |
| - os.environ['KEY_FILE_LOCATION'], scopes='https://www.googleapis.com/auth/analytics.readonly') |
17 |
| - analytics = build('analyticsreporting', 'v4', credentials=credentials) |
18 |
| - return analytics |
19 |
| - |
20 |
| - |
21 |
| -def get_report(analytics: Resource) -> Dict: |
22 |
| - return analytics.reports().batchGet( |
23 |
| - body={ |
24 |
| - "reportRequests": |
25 |
| - [ |
26 |
| - { |
27 |
| - "viewId": "85132606", |
28 |
| - "samplingLevel": "LARGE", |
29 |
| - "filtersExpression": "ga:hostname==kotlinlang.org;ga:pagepath!@?", |
30 |
| - "pageSize": 10000, |
31 |
| - "orderBys": [ |
32 |
| - { |
33 |
| - "fieldName": "ga:uniquepageviews", |
34 |
| - "sortOrder": "DESCENDING" |
35 |
| - } |
36 |
| - ], |
37 |
| - "dateRanges": |
38 |
| - [ |
39 |
| - { |
40 |
| - "startDate": "30daysAgo", |
41 |
| - "endDate": "yesterday" |
42 |
| - } |
43 |
| - ], |
44 |
| - "metrics": |
45 |
| - [ |
46 |
| - { |
47 |
| - "expression": "ga:uniquepageviews", |
48 |
| - "alias": "" |
49 |
| - } |
50 |
| - ], |
51 |
| - "dimensions": |
52 |
| - [ |
53 |
| - { |
54 |
| - "name": "ga:pagePath" |
55 |
| - } |
56 |
| - ] |
57 |
| - } |
58 |
| - ] |
59 |
| - }).execute() |
| 13 | +def get_page_views_statistic() -> Dict[str, int]: |
| 14 | + print("Acquiring page view statistic") |
60 | 15 |
|
| 16 | + file = open("page_views_map.json", "r") |
| 17 | + page_views = json.load(file) |
| 18 | + file.close() |
61 | 19 |
|
62 |
| -def get_page_views_statistic() -> Dict[str, int]: |
63 |
| - print("Acquiring page view statistic from google") |
64 |
| - page_views = {} |
65 |
| - analytics = initialize_analyticsreporting() |
66 |
| - report = get_report(analytics) |
67 |
| - for row in report["reports"][0]["data"]["rows"]: |
68 |
| - page_views[row["dimensions"][0]] = int(row['metrics'][0]["values"][0]) |
69 | 20 | print("Page view statistic acquired")
|
| 21 | + |
70 | 22 | return page_views
|
71 | 23 |
|
72 | 24 |
|
@@ -110,7 +62,8 @@ def get_valuable_content(page_path, content: Iterator[Tag]) -> List[str]:
|
110 | 62 | valuable_content.append(child.text)
|
111 | 63 | elif child.name in ['ul', 'ol', 'blockquote', 'div', 'section', 'dl']:
|
112 | 64 | valuable_content += get_valuable_content(page_path, child.children)
|
113 |
| - elif child.name in ['figure', 'iframe', 'pre', 'code', 'hr', 'table', 'script', 'link', 'a', 'br', 'i', 'img', 'object']: |
| 65 | + elif child.name in ['figure', 'iframe', 'pre', 'code', 'hr', 'table', 'script', 'link', 'a', 'br', 'i', 'img', |
| 66 | + 'object']: |
114 | 67 | continue
|
115 | 68 | else:
|
116 | 69 | raise Exception('Unknown tag "' + child.name + '" in ' + page_path)
|
@@ -243,8 +196,9 @@ def build_search_indices(pages):
|
243 | 196 | page_path = get_page_path_from_url(url)
|
244 | 197 | page_views = 0
|
245 | 198 |
|
246 |
| - if url in page_views_statistic: |
247 |
| - page_views = page_views_statistic[url] |
| 199 | + public_url = "https://kotlinlang.org" + url |
| 200 | + if public_url in page_views_statistic: |
| 201 | + page_views = page_views_statistic[public_url] |
248 | 202 |
|
249 | 203 | if type == 'Page_Community':
|
250 | 204 | page_type = 'Community'
|
|
0 commit comments