Skip to content

Commit 727098b

Browse files
committed
feat(search): KTL-1516: migrate analytics to analytic agnostic file
- Created new `get_page_views_statistic()` which fetches page view statistics locally from a saved file - Drop dependencies for Google Analytics
1 parent 5c0f322 commit 727098b

File tree

2 files changed

+12
-59
lines changed

2 files changed

+12
-59
lines changed

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,4 @@ git+https://github.com/pik-software/geocoder.git@yandex-api-key#egg=geocoder
1414
ruamel.yaml==0.17.21
1515
PyYAML==5.4.1
1616
algoliasearch==1.20.0
17-
google-api-python-client==1.6.2
1817
Werkzeug==2.3.8

src/search.py

Lines changed: 12 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,24 @@
1+
import json
12
import os
23
from typing import Dict, List, Iterator
34

45
from algoliasearch import algoliasearch
56
from algoliasearch.index import Index
67
from bs4 import Tag
7-
from googleapiclient.discovery import build, Resource
8-
from oauth2client.service_account import ServiceAccountCredentials
98

109
from src.api import get_api_page
1110
from src.dist import get_dist_page_xml, dist_path
1211

1312

14-
def initialize_analyticsreporting() -> Resource:
15-
credentials = ServiceAccountCredentials.from_json_keyfile_name(
16-
os.environ['KEY_FILE_LOCATION'], scopes='https://www.googleapis.com/auth/analytics.readonly')
17-
analytics = build('analyticsreporting', 'v4', credentials=credentials)
18-
return analytics
19-
20-
21-
def get_report(analytics: Resource) -> Dict:
22-
return analytics.reports().batchGet(
23-
body={
24-
"reportRequests":
25-
[
26-
{
27-
"viewId": "85132606",
28-
"samplingLevel": "LARGE",
29-
"filtersExpression": "ga:hostname==kotlinlang.org;ga:pagepath!@?",
30-
"pageSize": 10000,
31-
"orderBys": [
32-
{
33-
"fieldName": "ga:uniquepageviews",
34-
"sortOrder": "DESCENDING"
35-
}
36-
],
37-
"dateRanges":
38-
[
39-
{
40-
"startDate": "30daysAgo",
41-
"endDate": "yesterday"
42-
}
43-
],
44-
"metrics":
45-
[
46-
{
47-
"expression": "ga:uniquepageviews",
48-
"alias": ""
49-
}
50-
],
51-
"dimensions":
52-
[
53-
{
54-
"name": "ga:pagePath"
55-
}
56-
]
57-
}
58-
]
59-
}).execute()
13+
def get_page_views_statistic() -> Dict[str, int]:
14+
print("Acquiring page view statistic")
6015

16+
file = open("page_views_map.json", "r")
17+
page_views = json.load(file)
18+
file.close()
6119

62-
def get_page_views_statistic() -> Dict[str, int]:
63-
print("Acquiring page view statistic from google")
64-
page_views = {}
65-
analytics = initialize_analyticsreporting()
66-
report = get_report(analytics)
67-
for row in report["reports"][0]["data"]["rows"]:
68-
page_views[row["dimensions"][0]] = int(row['metrics'][0]["values"][0])
6920
print("Page view statistic acquired")
21+
7022
return page_views
7123

7224

@@ -110,7 +62,8 @@ def get_valuable_content(page_path, content: Iterator[Tag]) -> List[str]:
11062
valuable_content.append(child.text)
11163
elif child.name in ['ul', 'ol', 'blockquote', 'div', 'section', 'dl']:
11264
valuable_content += get_valuable_content(page_path, child.children)
113-
elif child.name in ['figure', 'iframe', 'pre', 'code', 'hr', 'table', 'script', 'link', 'a', 'br', 'i', 'img', 'object']:
65+
elif child.name in ['figure', 'iframe', 'pre', 'code', 'hr', 'table', 'script', 'link', 'a', 'br', 'i', 'img',
66+
'object']:
11467
continue
11568
else:
11669
raise Exception('Unknown tag "' + child.name + '" in ' + page_path)
@@ -243,8 +196,9 @@ def build_search_indices(pages):
243196
page_path = get_page_path_from_url(url)
244197
page_views = 0
245198

246-
if url in page_views_statistic:
247-
page_views = page_views_statistic[url]
199+
public_url = "https://kotlinlang.org" + url
200+
if public_url in page_views_statistic:
201+
page_views = page_views_statistic[public_url]
248202

249203
if type == 'Page_Community':
250204
page_type = 'Community'

0 commit comments

Comments
 (0)