|
12 | 12 | import logging |
13 | 13 | import os |
14 | 14 | import shutil |
15 | | -import tempfile |
16 | 15 | import traceback |
17 | 16 | import xml.etree.ElementTree as ET |
18 | 17 | from pathlib import Path |
|
23 | 22 | from typing import Set |
24 | 23 | from typing import Tuple |
25 | 24 |
|
26 | | -from binaryornot.helpers import is_binary_string |
27 | | -from git import DiffIndex |
28 | | -from git import Repo |
| 25 | +from fetchcode.vcs import fetch_via_vcs |
29 | 26 | from license_expression import Licensing |
30 | 27 | from packageurl import PackageURL |
31 | 28 | from univers.version_range import VersionRange |
@@ -312,193 +309,37 @@ def advisory_data(self) -> Iterable[AdvisoryData]: |
312 | 309 | raise NotImplementedError |
313 | 310 |
|
314 | 311 |
|
315 | | -# TODO: Needs rewrite |
316 | | -class GitImporter(Importer): |
317 | | - def validate_configuration(self) -> None: |
| 312 | +class ForkError(Exception): |
| 313 | + pass |
318 | 314 |
|
319 | | - if not self.config.create_working_directory and self.config.working_directory is None: |
320 | | - self.error( |
321 | | - '"create_working_directory" is not set but "working_directory" is set to ' |
322 | | - "the default, which calls tempfile.mkdtemp()" |
323 | | - ) |
324 | 315 |
|
325 | | - if not self.config.create_working_directory and not os.path.exists( |
326 | | - self.config.working_directory |
327 | | - ): |
328 | | - self.error( |
329 | | - '"working_directory" does not contain an existing directory and' |
330 | | - '"create_working_directory" is not set' |
331 | | - ) |
332 | | - |
333 | | - if not self.config.remove_working_directory and self.config.working_directory is None: |
334 | | - self.error( |
335 | | - '"remove_working_directory" is not set and "working_directory" is set to ' |
336 | | - "the default, which calls tempfile.mkdtemp()" |
337 | | - ) |
| 316 | +class GitImporter(Importer): |
| 317 | + def __init__(self, repo_url): |
| 318 | + super().__init__() |
| 319 | + self.repo_url = repo_url |
| 320 | + self.vcs_response = None |
338 | 321 |
|
339 | 322 | def __enter__(self): |
340 | | - self._ensure_working_directory() |
341 | | - self._ensure_repository() |
342 | | - |
343 | | - def __exit__(self, exc_type, exc_val, exc_tb): |
344 | | - if self.config.remove_working_directory: |
345 | | - shutil.rmtree(self.config.working_directory) |
346 | | - |
347 | | - def file_changes( |
348 | | - self, |
349 | | - subdir: str = None, |
350 | | - recursive: bool = False, |
351 | | - file_ext: Optional[str] = None, |
352 | | - ) -> Tuple[Set[str], Set[str]]: |
353 | | - """ |
354 | | - Returns all added and modified files since last_run_date or cutoff_date (whichever is more |
355 | | - recent). |
356 | | -
|
357 | | - :param subdir: filter by files in this directory |
358 | | - :param recursive: whether to include files in subdirectories |
359 | | - :param file_ext: filter files by this extension |
360 | | - :return: The first set contains (absolute paths to) added files, the second one modified |
361 | | - files |
362 | | - """ |
363 | | - if subdir is None: |
364 | | - working_dir = self.config.working_directory |
365 | | - else: |
366 | | - working_dir = os.path.join(self.config.working_directory, subdir) |
| 323 | + super().__enter__() |
| 324 | + self.clone() |
| 325 | + return self |
367 | 326 |
|
368 | | - path = Path(working_dir) |
| 327 | + def __exit__(self): |
| 328 | + self.vcs_response.delete() |
369 | 329 |
|
370 | | - if self.config.last_run_date is None and self.config.cutoff_date is None: |
371 | | - if recursive: |
372 | | - glob = "**/*" |
373 | | - else: |
374 | | - glob = "*" |
375 | | - |
376 | | - if file_ext: |
377 | | - glob = f"{glob}.{file_ext}" |
378 | | - |
379 | | - return {str(p) for p in path.glob(glob) if p.is_file()}, set() |
380 | | - |
381 | | - return self._collect_file_changes(subdir=subdir, recursive=recursive, file_ext=file_ext) |
382 | | - |
383 | | - def _collect_file_changes( |
384 | | - self, |
385 | | - subdir: Optional[str], |
386 | | - recursive: bool, |
387 | | - file_ext: Optional[str], |
388 | | - ) -> Tuple[Set[str], Set[str]]: |
389 | | - |
390 | | - added_files, updated_files = set(), set() |
391 | | - |
392 | | - # find the most ancient commit we need to diff with |
393 | | - cutoff_commit = None |
394 | | - for commit in self._repo.iter_commits(self._repo.head): |
395 | | - if commit.committed_date < self.cutoff_timestamp: |
396 | | - break |
397 | | - cutoff_commit = commit |
398 | | - |
399 | | - if cutoff_commit is None: |
400 | | - return added_files, updated_files |
401 | | - |
402 | | - def _is_binary(d: DiffIndex): |
403 | | - return is_binary_string(d.b_blob.data_stream.read(1024)) |
404 | | - |
405 | | - for d in cutoff_commit.diff(self._repo.head.commit): |
406 | | - if not _include_file(d.b_path, subdir, recursive, file_ext) or _is_binary(d): |
407 | | - continue |
408 | | - |
409 | | - abspath = os.path.join(self.config.working_directory, d.b_path) |
410 | | - if d.new_file: |
411 | | - added_files.add(abspath) |
412 | | - elif d.a_blob and d.b_blob: |
413 | | - if d.a_path != d.b_path: |
414 | | - # consider moved files as added |
415 | | - added_files.add(abspath) |
416 | | - elif d.a_blob != d.b_blob: |
417 | | - updated_files.add(abspath) |
418 | | - |
419 | | - # Any file that has been added and then updated inside the window of the git history we |
420 | | - # looked at, should be considered "added", not "updated", since it does not exist in the |
421 | | - # database yet. |
422 | | - updated_files = updated_files - added_files |
423 | | - |
424 | | - return added_files, updated_files |
425 | | - |
426 | | - def _ensure_working_directory(self) -> None: |
427 | | - if self.config.working_directory is None: |
428 | | - self.config.working_directory = tempfile.mkdtemp() |
429 | | - elif self.config.create_working_directory and not os.path.exists( |
430 | | - self.config.working_directory |
431 | | - ): |
432 | | - os.mkdir(self.config.working_directory) |
433 | | - |
434 | | - def _ensure_repository(self) -> None: |
435 | | - if not os.path.exists(os.path.join(self.config.working_directory, ".git")): |
436 | | - self._clone_repository() |
437 | | - return |
438 | | - self._repo = Repo(self.config.working_directory) |
439 | | - |
440 | | - if self.config.branch is None: |
441 | | - self.config.branch = str(self._repo.active_branch) |
442 | | - branch = self.config.branch |
443 | | - self._repo.head.reference = self._repo.heads[branch] |
444 | | - self._repo.head.reset(index=True, working_tree=True) |
445 | | - |
446 | | - remote = self._find_or_add_remote() |
447 | | - self._update_from_remote(remote, branch) |
448 | | - |
449 | | - def _clone_repository(self) -> None: |
450 | | - kwargs = {} |
451 | | - if self.config.branch: |
452 | | - kwargs["branch"] = self.config.branch |
453 | | - |
454 | | - self._repo = Repo.clone_from( |
455 | | - self.config.repository_url, self.config.working_directory, **kwargs |
456 | | - ) |
457 | | - |
458 | | - def _find_or_add_remote(self): |
459 | | - remote = None |
460 | | - for r in self._repo.remotes: |
461 | | - if r.url == self.config.repository_url: |
462 | | - remote = r |
463 | | - break |
464 | | - |
465 | | - if remote is None: |
466 | | - remote = self._repo.create_remote( |
467 | | - "added_by_vulnerablecode", url=self.config.repository_url |
468 | | - ) |
469 | | - |
470 | | - return remote |
471 | | - |
472 | | - def _update_from_remote(self, remote, branch) -> None: |
473 | | - fetch_info = remote.fetch() |
474 | | - if len(fetch_info) == 0: |
475 | | - return |
476 | | - branch = self._repo.branches[branch] |
477 | | - branch.set_reference(remote.refs[branch.name]) |
478 | | - self._repo.head.reset(index=True, working_tree=True) |
479 | | - |
480 | | - |
481 | | -def _include_file( |
482 | | - path: str, |
483 | | - subdir: Optional[str] = None, |
484 | | - recursive: bool = False, |
485 | | - file_ext: Optional[str] = None, |
486 | | -) -> bool: |
487 | | - match = True |
488 | | - |
489 | | - if subdir: |
490 | | - if not subdir.endswith(os.path.sep): |
491 | | - subdir = f"{subdir}{os.path.sep}" |
492 | | - |
493 | | - match = match and path.startswith(subdir) |
494 | | - |
495 | | - if not recursive: |
496 | | - match = match and (os.path.sep not in path[len(subdir or "") :]) |
497 | | - |
498 | | - if file_ext: |
499 | | - match = match and path.endswith(f".{file_ext}") |
| 330 | + def clone(self): |
| 331 | + try: |
| 332 | + self.vcs_response = fetch_via_vcs(self.repo_url) |
| 333 | + except Exception as e: |
| 334 | + msg = f"Failed to fetch {self.repo_url} via vcs: {e}" |
| 335 | + logger.error(msg) |
| 336 | + raise ForkError(msg) from e |
500 | 337 |
|
501 | | - return match |
| 338 | + def advisory_data(self) -> Iterable[AdvisoryData]: |
| 339 | + """ |
| 340 | + Return AdvisoryData objects corresponding to the data being imported |
| 341 | + """ |
| 342 | + raise NotImplementedError |
502 | 343 |
|
503 | 344 |
|
504 | 345 | # TODO: Needs rewrite |
|
0 commit comments