Skip to content

Commit eb716b9

Browse files
committed
Merge branch 'main' into 247-purl-cli-add-urls
Signed-off-by: John M. Horan <[email protected]>
2 parents 73f4599 + e359516 commit eb716b9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+3352
-237
lines changed

.github/workflows/docs-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
strategy:
1010
max-parallel: 4
1111
matrix:
12-
python-version: [3.9]
12+
python-version: ["3.10"]
1313

1414
steps:
1515
- name: Checkout code

.github/workflows/purldb-tests.yml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
runs-on: ubuntu-20.04
1414

1515
services:
16-
postgres:
16+
postgres1:
1717
image: postgres:13
1818
env:
1919
POSTGRES_DB: ${{ env.POSTGRES_DB }}
@@ -28,10 +28,25 @@ jobs:
2828
ports:
2929
- 5432:5432
3030

31+
postgres2:
32+
image: postgres:13
33+
env:
34+
POSTGRES_DB: matchcodeio
35+
POSTGRES_USER: matchcodeio
36+
POSTGRES_PASSWORD: matchcodeio
37+
POSTGRES_INITDB_ARGS: ${{ env.POSTGRES_INITDB_ARGS }}
38+
options: >-
39+
--health-cmd pg_isready
40+
--health-interval 10s
41+
--health-timeout 5s
42+
--health-retries 5
43+
ports:
44+
- 5433:5432
45+
3146
strategy:
3247
max-parallel: 4
3348
matrix:
34-
python-version: ["3.8", "3.9", "3.10"]
49+
python-version: ["3.10", "3.11"]
3550

3651
steps:
3752
- name: Checkout code
@@ -50,5 +65,7 @@ jobs:
5065
- name: Run tests
5166
working-directory: .
5267
run: |
53-
make envfile
68+
make envfile_testing
69+
sudo mkdir /etc/scancodeio
70+
sudo cp .env /etc/scancodeio
5471
make test

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,5 @@ tcl
7474

7575
# Env Files
7676
.env
77+
78+
var/

CHANGELOG.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
Changelog
22
=========
33

4-
next-version
4+
v4.0.0
55
------------
66

7-
- TBD
7+
- Add `/api/docs` Swagger API documentation for API endpoints.
88

99
v3.0.0
1010
-------

Dockerfile

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) nexB Inc. and others. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
FROM --platform=linux/amd64 python:3.9
4+
FROM --platform=linux/amd64 python:3.11
55

66
WORKDIR /app
77

@@ -31,10 +31,9 @@ RUN apt-get update \
3131
&& apt-get clean \
3232
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
3333

34-
COPY setup.cfg setup.py /app/
35-
RUN mkdir -p /app/matchcode-toolkit/src/
36-
COPY matchcode-toolkit/setup.cfg matchcode-toolkit/setup.py /app/matchcode-toolkit/
37-
RUN pip install -e matchcode-toolkit
38-
RUN pip install -e .
34+
# Install the dependencies before the codebase COPY for proper Docker layer caching
35+
COPY setup.cfg setup.py requirements.txt /app/
36+
RUN pip install --no-cache-dir -c requirements.txt .
3937

38+
# Copy the codebase
4039
COPY . /app

MANIFEST.in

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
graft src
1+
graft clearcode
2+
graft clearindex
3+
graft matchcode
4+
graft minecode
5+
graft packagedb
6+
graft purldb
27

38
include *.LICENSE
49
include NOTICE

Makefile

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
# Python version can be specified with `$ PYTHON_EXE=python3.x make conf`
1111
PYTHON_EXE?=python3
1212
VENV=venv
13-
MANAGE=${VENV}/bin/python manage.py
13+
MANAGE=${VENV}/bin/python manage_purldb.py
14+
MATCHCODE_MANAGE=${VENV}/bin/python manage_matchcode.py
1415
ACTIVATE?=. ${VENV}/bin/activate;
1516
VIRTUALENV_PYZ=../etc/thirdparty/virtualenv.pyz
1617
# Do not depend on Python to generate the SECRET_KEY
@@ -19,6 +20,7 @@ GET_SECRET_KEY=`base64 /dev/urandom | head -c50`
1920
ENV_FILE=.env
2021
# Customize with `$ make postgres PACKAGEDB_DB_PASSWORD=YOUR_PASSWORD`
2122
PACKAGEDB_DB_PASSWORD=packagedb
23+
MATCHCODEIO_DB_PASSWORD=matchcodeio
2224

2325
# Use sudo for postgres, but only on Linux
2426
UNAME := $(shell uname)
@@ -46,6 +48,13 @@ envfile:
4648
@mkdir -p $(shell dirname ${ENV_FILE}) && touch ${ENV_FILE}
4749
@echo SECRET_KEY=\"${GET_SECRET_KEY}\" > ${ENV_FILE}
4850

51+
envfile_testing:
52+
@echo "-> Create the .env file and generate a secret key"
53+
@if test -f ${ENV_FILE}; then echo ".env file exists already"; exit 1; fi
54+
@mkdir -p $(shell dirname ${ENV_FILE}) && touch ${ENV_FILE}
55+
@echo SECRET_KEY=\"${GET_SECRET_KEY}\" >> ${ENV_FILE}
56+
@echo SCANCODEIO_DB_PORT=\"5433\" >> ${ENV_FILE}
57+
4958
isort:
5059
@echo "-> Apply isort changes to ensure proper imports ordering"
5160
${VENV}/bin/isort .
@@ -87,9 +96,23 @@ postgres:
8796
${SUDO_POSTGRES} createdb --encoding=utf-8 --owner=packagedb packagedb
8897
@$(MAKE) migrate
8998

99+
postgres_matchcodeio:
100+
@echo "-> Configure PostgreSQL database"
101+
@echo "-> Create database user 'matchcodeio'"
102+
${SUDO_POSTGRES} createuser --no-createrole --no-superuser --login --inherit --createdb matchcodeio || true
103+
${SUDO_POSTGRES} psql -c "alter user matchcodeio with encrypted password '${MATCHCODEIO_DB_PASSWORD}';" || true
104+
@echo "-> Drop 'matchcodeio' database"
105+
${SUDO_POSTGRES} dropdb matchcodeio || true
106+
@echo "-> Create 'matchcodeio' database"
107+
${SUDO_POSTGRES} createdb --encoding=utf-8 --owner=matchcodeio matchcodeio
108+
${MATCHCODE_MANAGE} migrate
109+
90110
run:
91111
${MANAGE} runserver 8001 --insecure
92112

113+
run_matchcodeio:
114+
${MATCHCODE_MANAGE} runserver 8002 --insecure
115+
93116
seed:
94117
${MANAGE} seed
95118

@@ -107,8 +130,9 @@ process_scans:
107130

108131
test:
109132
@echo "-> Run the test suite"
110-
${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs --ignore matchcode-toolkit --ignore purldb-toolkit --ignore packagedb/tests/test_throttling.py
133+
${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs --ignore matchcode-toolkit --ignore matchcode_pipeline --ignore matchcode_project --ignore purldb-toolkit --ignore packagedb/tests/test_throttling.py
111134
${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs packagedb/tests/test_throttling.py
135+
${ACTIVATE} DJANGO_SETTINGS_MODULE=matchcode_project.settings ${PYTHON_EXE} -m pytest -vvs matchcode_pipeline
112136
${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs matchcode-toolkit --ignore matchcode-toolkit/src/matchcode_toolkit/pipelines
113137
${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs purldb-toolkit
114138

README.rst

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ This repo consists of four main tools:
88
- MineCode that contains utilities to mine package repositories
99
- MatchCode that contains utilities to index package metadata and resources for
1010
matching
11+
- MatchCode.io that provides package matching functionalities for codebases
1112
- ClearCode that contains utilities to mine Clearlydefined for package data
1213

1314
These are designed to be used first for reference such that one can query for
@@ -39,6 +40,7 @@ Once the prerequisites have been installed, set up PurlDB with the following com
3940
make dev
4041
make envfile
4142
make postgres
43+
make postgres_matchcodeio
4244

4345
Once PurlDB and the database has been set up, run tests to ensure functionality:
4446
::
@@ -53,6 +55,11 @@ Start the PurlDB server by running:
5355

5456
make run
5557

58+
Start the MatchCode.io server by running:
59+
::
60+
61+
make run_matchcodeio
62+
5663
To start visiting upstream package repositories for package metadata:
5764
::
5865

@@ -69,33 +76,13 @@ Populating Package Resource Data
6976
The Resources of Packages can be collected using the scan queue. By default, a
7077
scan request will be created for each mapped Package.
7178

72-
The following environment variables will have to be set for the scan queue
73-
commands to work:
79+
Given that you have access to a ScanCode.io instance, the following environment
80+
variables will have to be set for the scan queue commands to work:
7481
::
7582

7683
SCANCODEIO_URL=<ScanCode.io API URL>
7784
SCANCODEIO_API_KEY=<ScanCode.io API Key>
7885

79-
``matchcode-toolkit`` will also have to be installed in the same environment as
80-
ScanCode.io. If running ScanCode.io in a virtual environment from a git
81-
checkout, you can install ``matchcode-toolkit`` in editable mode:
82-
::
83-
84-
pip install -e <Path to purldb/matchcode-toolkit>
85-
86-
Otherwise, you can create a wheel from ``matchcode-toolkit`` and install it in
87-
the ScanCode.io virutal environment or modify the ScanCode.io Dockerfile to
88-
install the ``matchcode-toolkit`` wheel.
89-
90-
To build the ``matchcode-toolkit`` wheel:
91-
::
92-
93-
# From the matchcode-toolkit directory
94-
python setup.py bdist_wheel
95-
96-
The wheel ``matchcode_toolkit-0.0.1-py3-none-any.whl`` will be created in the
97-
``matchcode-toolkit/dist/`` directory.
98-
9986
The scan queue is run using two commands:
10087
::
10188

@@ -136,8 +123,8 @@ matching indices from the collected Package data:
136123
make index_packages
137124

138125

139-
API Endpoints
140-
-------------
126+
PurlDB API Endpoints
127+
--------------------
141128

142129
* ``api/packages``
143130

@@ -172,6 +159,51 @@ API Endpoints
172159
* Used to check the SHA1 values of archives from a scan to determine if they are known Packages
173160

174161

162+
MatchCode.io
163+
------------
164+
165+
MatchCode.io is a Django app, based off of ScanCode.io, that exposes one API
166+
endpoint, ``api/matching``, which takes a ScanCode.io codebase scan, and
167+
performs Package matching on it.
168+
169+
Currently, it performs three matching steps:
170+
171+
* Match codebase resources against the Packages in the PackageDB
172+
* Match codebase resources against the Resources in the PackageDB
173+
* Match codebase directories against the directory matching indices of
174+
MatchCode
175+
176+
177+
MatchCode.io API Endpoints
178+
--------------------------
179+
180+
* ``api/matching``
181+
182+
* Performs Package matching on an uploaded ScanCode.io scan
183+
* Intended to be used with the ``match_to_purldb`` pipeline in ScanCode.io
184+
185+
186+
Docker Setup for Local Development and Testing
187+
----------------------------------------------
188+
189+
PurlDB and MatchCode.io are two separate Django apps. In order to run both of
190+
these Django apps on the same host, we need to use Traefik.
191+
192+
Traefik is an edge router that receives requests and finds out which services
193+
are responsible for handling them. In the docker-compose.yml files for PurlDB
194+
and MatchCode.io, we have made these two services part of the same Docker
195+
network and set up the routes for each service.
196+
197+
All requests to the host go to the PurlDB service, but requests that go to the
198+
``api/matching`` endpoint are routed to the MatchCode.io service.
199+
200+
To run PurlDB and Matchcode.io with Docker:
201+
::
202+
203+
docker compose -f docker-compose_traefik.yml up -d
204+
docker compose -f docker-compose_purldb.yml up -d
205+
docker compose -f docker-compose_matchcodeio.yml up -d
206+
175207
Funding
176208
-------
177209

docker-compose_matchcodeio.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
version: "3"
2+
3+
services:
4+
matchcodeio_db:
5+
image: postgres:13
6+
env_file:
7+
- docker_matchcodeio.env
8+
volumes:
9+
- matchcodeio_db_data:/var/lib/postgresql/data/
10+
11+
matchcodeio_redis:
12+
image: redis
13+
# Enable redis data persistence using the "Append Only File" with the
14+
# default policy of fsync every second. See https://redis.io/topics/persistence
15+
command: redis-server --appendonly yes
16+
volumes:
17+
- redis_data:/data
18+
restart: always
19+
20+
matchcodeio_web:
21+
build: .
22+
command: wait-for-it --strict --timeout=60 db:5432 -- sh -c "
23+
python manage_matchcode.py migrate --database default &&
24+
python manage_matchcode.py collectstatic --no-input --verbosity 0 --clear &&
25+
gunicorn matchcode_project.wsgi:application --bind :8001 --timeout 600 --workers 8"
26+
environment:
27+
- DJANGO_SETTINGS_MODULE=matchcode_project.settings
28+
env_file:
29+
- docker_matchcodeio.env
30+
expose:
31+
- 8001
32+
volumes:
33+
- .env:/opt/scancodeio/.env
34+
- /etc/scancodeio/:/etc/scancodeio/
35+
- workspace:/var/scancodeio/workspace/
36+
- static:/var/scancodeio/static/
37+
38+
matchcodeio_worker:
39+
build: .
40+
# Ensure that potential db migrations run first by waiting until "web" is up
41+
command: wait-for-it --strict --timeout=120 matchcodeio_web:8001 -- sh -c "
42+
python manage_matchcode.py rqworker --worker-class scancodeio.worker.ScanCodeIOWorker
43+
--queue-class scancodeio.worker.ScanCodeIOQueue
44+
--verbosity 1"
45+
environment:
46+
- DJANGO_SETTINGS_MODULE=matchcode_project.settings
47+
env_file:
48+
- docker_matchcodeio.env
49+
volumes:
50+
- .env:/opt/scancodeio/.env
51+
- /etc/scancodeio/:/etc/scancodeio/
52+
- workspace:/var/scancodeio/workspace/
53+
depends_on:
54+
- matchcodeio_redis
55+
- matchcodeio_web
56+
57+
matchcodeio_nginx:
58+
image: nginx:alpine
59+
volumes:
60+
- ./etc/nginx/matchcodeio-conf.d/:/etc/nginx/conf.d/
61+
- /var/www/html:/var/www/html
62+
- static:/var/scancodeio/static/
63+
depends_on:
64+
- matchcodeio_web
65+
restart: always
66+
labels:
67+
- "traefik.enable=true"
68+
- "traefik.http.routers.matchcodeio.rule=
69+
Host(`127.0.0.1`) && PathPrefix(`/api/matching`)
70+
|| Host(`127.0.0.1`) && PathPrefix(`/api/runs`)
71+
|| Host(`127.0.0.1`) && PathPrefix(`/project`)
72+
|| Host(`127.0.0.1`) && PathPrefix(`/runs`)
73+
|| Host(`localhost`) && PathPrefix(`/api/matching`)
74+
|| Host(`localhost`) && PathPrefix(`/api/runs`)
75+
|| Host(`localhost`) && PathPrefix(`/project`)
76+
|| Host(`localhost`) && PathPrefix(`/runs`)"
77+
- "traefik.http.routers.matchcodeio.entrypoints=web"
78+
79+
networks:
80+
default:
81+
name: purldb
82+
external: true
83+
84+
volumes:
85+
redis_data:
86+
static:
87+
workspace:
88+
matchcodeio_db_data:

0 commit comments

Comments
 (0)