Skip to content

Commit 597e1fa

Browse files
authored
Actually upload the generated sitemaps (and perform logging) (#2309)
Turns out, the original code wasn't doing any uploading 🤦
1 parent 46c25da commit 597e1fa

File tree

6 files changed

+29
-7
lines changed

6 files changed

+29
-7
lines changed

deployment/clouddeploy/gke-workers/base/generate-sitemap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ spec:
1313
- name: generate-sitemap-cron
1414
image: cron
1515
imagePullPolicy: Always
16-
command: ["/usr/local/bin/generate_sitemap/generate_sitemap.py", "--base_url", "$BASE_URL"]
16+
command: ["/usr/local/bin/generate_sitemap/generate_and_upload.sh"]
1717
resources:
1818
requests:
1919
cpu: 1

deployment/clouddeploy/gke-workers/environments/oss-vdb-test/generate-sitemap.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ spec:
1414
value: "https://test.osv.dev"
1515
- name: GOOGLE_CLOUD_PROJECT
1616
value: oss-vdb-test
17+
- name: OUTPUT_GCS_BUCKET
18+
value: test-osv-dev-sitemap

deployment/clouddeploy/gke-workers/environments/oss-vdb/generate-sitemap.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ spec:
1414
value: "https://osv.dev"
1515
- name: GOOGLE_CLOUD_PROJECT
1616
value: oss-vdb
17+
- name: OUTPUT_GCS_BUCKET
18+
value: osv-dev-sitemap
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
SITEMAP_OUTPUT="sitemap_output/"
6+
OUTPUT_BUCKET="${OUTPUT_GCS_BUCKET:=test-osv-dev-sitemap}"
7+
BASE_URL_PATH="${BASE_URL:=https://test.osv.dev}"
8+
9+
echo "Begin sitemap generation for $BASE_URL_PATH"
10+
11+
./generate_sitemap.py --base_url $BASE_URL_PATH
12+
13+
echo "Begin Syncing with cloud to $OUTPUT_BUCKET"
14+
15+
gsutil -m rsync -c -d $SITEMAP_OUTPUT "gs://$OUTPUT_BUCKET/"

docker/cron/generate_sitemap/generate_sitemap.py

100644100755
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
"""Generate sitemap."""
17+
import logging
1718
import sys
1819
import os
1920
import osv
@@ -59,17 +60,18 @@ def get_sitemap_url_for_ecosystem(ecosystem: str, base_url: str) -> str:
5960

6061
def generate_sitemap_for_ecosystem(ecosystem: str, base_url: str) -> None:
6162
"""Generate a sitemap for the give n ecosystem."""
63+
logging.info('Generating sitemap for ecosystem "%s".', ecosystem)
6264
vulnerability_ids = fetch_vulnerability_ids(ecosystem)
6365
filename = get_sitemap_filename_for_ecosystem(ecosystem)
6466
urlset = Element(
65-
'urlset', xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
67+
'urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
6668

6769
# TODO: For large ecosystems with over 50,000 vulnerabilities, generate
6870
# multiple sitemaps.
6971
for vuln in vulnerability_ids[:_SITEMAP_URL_LIMIT]:
7072
url = SubElement(urlset, 'url')
7173
loc = SubElement(url, 'loc')
72-
loc.text = f"{base_url}/vulnerability/{vuln}"
74+
loc.text = f'{base_url}/vulnerability/{vuln}'
7375
lastmod = SubElement(url, 'lastmod')
7476
lastmod.text = datetime.datetime.now().isoformat()
7577

@@ -79,11 +81,12 @@ def generate_sitemap_for_ecosystem(ecosystem: str, base_url: str) -> None:
7981

8082
def generate_sitemap_index(ecosystems: set[str], base_url: str) -> None:
8183
"""Generate a sitemap index."""
84+
logging.info('Generating sitemap index.')
8285
sitemapindex = Element(
83-
'sitemapindex', xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
86+
'sitemapindex', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
8487

8588
for ecosystem in ecosystems:
86-
sitemap = SubElement(sitemapindex, "sitemap")
89+
sitemap = SubElement(sitemapindex, 'sitemap')
8790
loc = SubElement(sitemap, 'loc')
8891
loc.text = get_sitemap_url_for_ecosystem(ecosystem, base_url)
8992
lastmod = SubElement(sitemap, 'lastmod')

gcp/appengine/frontend_handlers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ def index():
117117

118118
@blueprint.route('/robots.txt')
119119
def robots():
120-
response = make_response(f"Sitemap: {request.host_url}sitemap_index.xml\n")
121-
response.mimetype = "text/plain"
120+
response = make_response(f'Sitemap: {request.host_url}sitemap_index.xml\n')
121+
response.mimetype = 'text/plain'
122122
return response
123123

124124

0 commit comments

Comments
 (0)