Skip to content

Commit d94f477

Browse files
authored
feat: Much faster REST API import (#4035)
21 minutes to 2 seconds. By skipping jsonschema validate for parsing just (modified, id)
1 parent c82cae6 commit d94f477

File tree

1 file changed

+14
-6
lines changed

1 file changed

+14
-6
lines changed

gcp/workers/importer/importer.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import logging
2626
import os
2727
import requests
28+
from google.protobuf import json_format
2829
from requests.adapters import HTTPAdapter
2930
import shutil
3031
import threading
@@ -42,6 +43,7 @@
4243

4344
import osv
4445
import osv.logs
46+
from osv import vulnerability_pb2
4547

4648
DEFAULT_WORK_DIR = '/work'
4749
DEFAULT_PUBLIC_LOGGING_BUCKET = 'osv-public-import-logs'
@@ -847,7 +849,7 @@ def _process_deletions_bucket(self,
847849

848850
def _process_updates_rest(self, source_repo: osv.SourceRepository):
849851
"""Process updates from REST API.
850-
852+
851853
To find new updates, first makes a HEAD request to check the 'Last-Modified'
852854
header, and skips processing if it's before the source's last_modified_date
853855
(and ignore_last_import_time isn't set).
@@ -903,11 +905,17 @@ def _process_updates_rest(self, source_repo: osv.SourceRepository):
903905
except Exception:
904906
logging.exception('Exception querying REST API:')
905907
return
906-
# Parse vulns into Vulnerability objects from the REST API request.
907-
vulns = osv.parse_vulnerabilities_from_data(
908-
request.text,
909-
source_repo.extension,
910-
strict=source_repo.strict_validation and self._strict_validation)
908+
909+
data = json.loads(request.text)
910+
vulns = []
911+
for datum in data:
912+
vulnerability = vulnerability_pb2.Vulnerability()
913+
json_format.ParseDict(datum, vulnerability, ignore_unknown_fields=True)
914+
if not vulnerability.id:
915+
raise ValueError('Missing id field. Invalid vulnerability.')
916+
if not vulnerability.modified:
917+
raise ValueError('Missing modified field. Invalid vulnerability.')
918+
vulns.append(vulnerability)
911919

912920
vulns_last_modified = last_update_date
913921
logging.info('%d records to consider', len(vulns))

0 commit comments

Comments
 (0)