Skip to content

Commit 0a67a1f

Browse files
GlassOfWhiskeymr-c
authored andcommitted
Better identify repo licenses
This commit introduces calls to GitHub and GitLab APIs to better identify licenses in the Git repos. In particular, whenever the Git provider service is able to identify the license, the license_link field is populated with the SPDX URL of the corresponding license.
1 parent 76da3f2 commit 0a67a1f

File tree

13 files changed

+575
-11
lines changed

13 files changed

+575
-11
lines changed

.github/workflows/ci-build.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ jobs:
3232
distribution: 'adopt'
3333
java-version: '17'
3434

35+
- name: Set up Ruby
36+
uses: ruby/setup-ruby@v1
37+
with:
38+
ruby-version: '3.1'
39+
40+
- name: Install Licensee
41+
run: gem install licensee
42+
3543
- name: Install system packages
3644
run: |
3745
sudo apt-get -qq update

CONTRIBUTING.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ which allows them to be displayed in browsers as static prototypes.
8181
PostgreSQL is used to store information about `Workflow` and `QueuedWorkflow`
8282
objects using [Spring Data JPA](https://docs.spring.io/spring-data/jpa/docs/current/reference/html/).
8383

84+
The [Licensee](https://github.com/licensee/licensee) Ruby Gem is used to automatically infer license information from
85+
Git repositories.
86+
8487
The application also uses a triple store to keep the RDF representing
8588
workflows (gathered from [cwltool](https://github.com/common-workflow-language/cwltool)'s
8689
`--print-rdf` functionality).

Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
FROM maven:3-eclipse-temurin-17-alpine AS build-licensee
2+
3+
RUN apk add --update \
4+
alpine-sdk \
5+
cmake \
6+
heimdal-dev \
7+
ruby-dev \
8+
&& rm -rf /var/cache/apk/*
9+
10+
RUN gem install licensee
11+
12+
113
FROM maven:3-eclipse-temurin-17-alpine
214
MAINTAINER Stian Soiland-Reyes <[email protected]>
315

@@ -27,6 +39,8 @@ RUN apk add --update \
2739
libxml2-dev \
2840
libxml2-utils \
2941
libxslt-dev \
42+
ruby \
43+
heimdal \
3044
&& rm -rf /var/cache/apk/*
3145

3246
#wheel needed by ruamel.yaml for some reason
@@ -40,6 +54,9 @@ RUN mkdir /usr/share/maven/ref/repository
4054
RUN mkdir -p /usr/src/app
4155
WORKDIR /usr/src/app
4256

57+
COPY --from=build-licensee /usr/lib/ruby/gems/ /usr/lib/ruby/gems/
58+
COPY --from=build-licensee /usr/bin/licensee /usr/bin/
59+
4360
# Top-level files (ignoring .git etc)
4461
ADD pom.xml LICENSE.md NOTICE.md README.md /usr/src/app/
4562

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ system properties like `-Dspring.datasource.url=jdbc:postgresql://localhost:5432
113113
You will also need to have a SPARQL server such as [Apache Jena Fuseki](https://jena.apache.org/documentation/fuseki2/) running,
114114
by default on `localhost:3030`
115115
116+
#### Ruby and Licensee
117+
118+
To retrieve license information, CWL Viewe uses the [Licensee](https://github.com/licensee/licensee) Ruby Gem. To install it,
119+
[configure Ruby](https://www.ruby-lang.org/en/documentation/installation/) on your environment and then run
120+
121+
```bash
122+
gem install licensee
123+
```
124+
125+
116126
## Compiling and Running
117127

118128
To compile you will need [Java 17](https://www.oracle.com/java/technologies/downloads/) or a compatible distribution

src/main/java/org/commonwl/view/cwl/CWLService.java

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.jena.riot.RiotException;
4848
import org.commonwl.view.docker.DockerService;
4949
import org.commonwl.view.git.GitDetails;
50+
import org.commonwl.view.git.GitLicenseException;
5051
import org.commonwl.view.graphviz.ModelDotWriter;
5152
import org.commonwl.view.graphviz.RDFDotWriter;
5253
import org.commonwl.view.workflow.Workflow;
@@ -270,7 +271,7 @@ public Workflow parseWorkflowNative(Path workflowFile, String packedWorkflowId)
270271
* @return The constructed workflow object
271272
*/
272273
public Workflow parseWorkflowWithCwltool(Workflow basicModel, Path workflowFile, Path workTree)
273-
throws CWLValidationException {
274+
throws CWLValidationException, GitLicenseException {
274275
GitDetails gitDetails = basicModel.getRetrievedFrom();
275276
String latestCommit = basicModel.getLastCommit();
276277
String packedWorkflowID = gitDetails.getPackedId();
@@ -452,15 +453,7 @@ public Workflow parseWorkflowWithCwltool(Workflow basicModel, Path workflowFile,
452453
licenseLink = licenseResult.next().get("license").toString();
453454
} else {
454455
// Check for "LICENSE"-like files in root of git repo
455-
for (String licenseCandidate : new String[] {"LICENSE", "LICENSE.txt", "LICENSE.md"}) {
456-
// FIXME: This might wrongly match lower-case "license.txt" in case-insensitive
457-
// file systems
458-
// but the URL would not work
459-
if (Files.isRegularFile(workTree.resolve(licenseCandidate))) {
460-
// Link to it by raw URL
461-
licenseLink = basicModel.getRetrievedFrom().getRawUrl(null, licenseCandidate);
462-
}
463-
}
456+
licenseLink = basicModel.getRetrievedFrom().getLicense(workTree);
464457
}
465458

466459
// Docker link

src/main/java/org/commonwl/view/cwl/CWLToolRunner.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.Date;
2525
import org.apache.jena.query.QueryException;
2626
import org.commonwl.view.git.GitDetails;
27+
import org.commonwl.view.git.GitLicenseException;
2728
import org.commonwl.view.git.GitSemaphore;
2829
import org.commonwl.view.git.GitService;
2930
import org.commonwl.view.researchobject.ROBundleFactory;
@@ -111,7 +112,7 @@ public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow)
111112
queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR);
112113
queuedWorkflow.setMessage("An error occurred when executing a query on the SPARQL store");
113114
FileUtils.deleteGitRepository(repo);
114-
} catch (CWLValidationException ex) {
115+
} catch (CWLValidationException | GitLicenseException ex) {
115116
String message = ex.getMessage();
116117
logger.error(
117118
"Workflow " + queuedWorkflow.getId() + " from " + gitInfo.toSummary() + " : " + message,

src/main/java/org/commonwl/view/git/GitDetails.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,27 @@
2020
package org.commonwl.view.git;
2121

2222
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
23+
import com.fasterxml.jackson.databind.JsonNode;
24+
import com.fasterxml.jackson.databind.ObjectMapper;
25+
import java.io.IOException;
2326
import java.io.Serializable;
2427
import java.net.URI;
2528
import java.net.URISyntaxException;
29+
import java.nio.file.Path;
2630
import java.util.Objects;
31+
import org.slf4j.Logger;
32+
import org.slf4j.LoggerFactory;
2733

2834
/** Represents all the parameters necessary to access a file/directory with Git */
2935
@JsonIgnoreProperties(
3036
value = {"internalUrl"},
3137
ignoreUnknown = true)
3238
public class GitDetails implements Serializable {
3339

40+
private final Logger logger = LoggerFactory.getLogger(this.getClass());
41+
42+
private static final String SPDX_PREFIX = "https://spdx.org/licenses/";
43+
3444
private String repoUrl;
3545
private String branch;
3646
private String path;
@@ -276,4 +286,53 @@ public String toSummary() {
276286
return String.format(
277287
"repoUrl: %s branch: %s path: %s packedId: %s", repoUrl, branch, path, packedId);
278288
}
289+
290+
/**
291+
* Retrieves license details from the repo, if present.
292+
*
293+
* @param workTree the path to the locally cloned repo
294+
* @return The license URI
295+
*/
296+
public String getLicense(Path workTree) throws GitLicenseException {
297+
try {
298+
String[] command = {"licensee", "detect", "--json", workTree.toString()};
299+
if (logger.isTraceEnabled()) {
300+
logger.trace("Calling " + String.join(" ", command));
301+
}
302+
Process process = Runtime.getRuntime().exec(command, null);
303+
ObjectMapper mapper = new ObjectMapper();
304+
JsonNode jsonLicenses = mapper.readTree(process.getInputStream());
305+
if (logger.isTraceEnabled()) {
306+
logger.trace(
307+
"Licensee retrieved the following licenses:\n" + jsonLicenses.toPrettyString());
308+
}
309+
int size = jsonLicenses.withArray("licenses").size();
310+
if (size > 0) {
311+
String licenseCandidate =
312+
jsonLicenses.withArray("matched_files").get(0).get("filename").asText();
313+
String licenseLink = getRawUrl(null, licenseCandidate);
314+
if (logger.isWarnEnabled() && size > 1) {
315+
logger.warn(
316+
"There are "
317+
+ size
318+
+ " identified license files in the "
319+
+ repoUrl
320+
+ " repository. "
321+
+ "Taking the first one: "
322+
+ licenseLink);
323+
}
324+
String key = jsonLicenses.withArray("licenses").get(0).get("key").asText();
325+
if (!"other".equals(key)) {
326+
return SPDX_PREFIX + jsonLicenses.withArray("licenses").get(0).get("spdx_id").asText();
327+
} else {
328+
return licenseLink;
329+
}
330+
} else {
331+
return null;
332+
}
333+
} catch (IOException e) {
334+
throw new GitLicenseException(
335+
"While attempting to detect license for " + workTree + ": " + e.getMessage(), e);
336+
}
337+
}
279338
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package org.commonwl.view.git;
2+
3+
import javax.validation.ValidationException;
4+
5+
public class GitLicenseException extends ValidationException {
6+
7+
public GitLicenseException(String message) {
8+
super(message);
9+
}
10+
11+
public GitLicenseException(Throwable throwable) {
12+
super(throwable);
13+
}
14+
15+
public GitLicenseException(String message, Throwable throwable) {
16+
super(message, throwable);
17+
}
18+
}

src/test/java/org/commonwl/view/git/GitDetailsTest.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,13 @@
2121

2222
import static org.commonwl.view.git.GitDetails.normaliseUrl;
2323
import static org.junit.jupiter.api.Assertions.assertEquals;
24+
import static org.junit.jupiter.api.Assertions.assertNull;
25+
import static org.mockito.Mockito.when;
2426

27+
import java.io.File;
28+
import org.eclipse.jgit.lib.Repository;
2529
import org.junit.jupiter.api.Test;
30+
import org.mockito.Mockito;
2631

2732
public class GitDetailsTest {
2833

@@ -141,4 +146,28 @@ public void getNormaliseUrl() throws Exception {
141146
assertEquals("github.com/test/url/here", normaliseUrl("http://www.github.com/test/url/here"));
142147
assertEquals("github.com/test/url/here", normaliseUrl("http://github.com/test/url/here"));
143148
}
149+
150+
/** Retrieves license details from the repo, if present. */
151+
@Test
152+
public void getLicense() throws Exception {
153+
Repository mockRepo = Mockito.mock(Repository.class);
154+
155+
when(mockRepo.getWorkTree()).thenReturn(new File("src/test/resources/cwl/licenses/apache/"));
156+
assertEquals(
157+
"https://spdx.org/licenses/Apache-2.0",
158+
GENERIC_DETAILS.getLicense(mockRepo.getWorkTree().toPath()));
159+
160+
when(mockRepo.getWorkTree()).thenReturn(new File("src/test/resources/cwl/licenses/multiple/"));
161+
assertEquals(
162+
"https://spdx.org/licenses/Apache-2.0",
163+
GENERIC_DETAILS.getLicense(mockRepo.getWorkTree().toPath()));
164+
165+
when(mockRepo.getWorkTree()).thenReturn(new File("src/test/resources/cwl/licenses/other/"));
166+
assertEquals(
167+
"https://could.com/be/anything/src/test/resources/cwl/licenses/other/LICENSE",
168+
GENERIC_DETAILS.getLicense(mockRepo.getWorkTree().toPath()));
169+
170+
when(mockRepo.getWorkTree()).thenReturn(new File("src/test/resources/cwl/licenses/other/"));
171+
assertNull(GENERIC_DETAILS.getLicense(mockRepo.getWorkTree().toPath()));
172+
}
144173
}

0 commit comments

Comments
 (0)