Skip to content

Commit 9eb32e6

Browse files
authored
Merge pull request #92 from P2GX/lc/obo2json
Fix the release to zenodo process
2 parents 4958e2c + c195df1 commit 9eb32e6

File tree

15 files changed

+240
-53
lines changed

15 files changed

+240
-53
lines changed

.github/workflows/create_prompts_and_upload.yml

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@ jobs:
1515
uses: actions/setup-java@v3
1616
with:
1717
java-version: '22.0.1'
18-
distribution: 'temurin'
19-
18+
distribution: 'temurin'
19+
cache: maven
20+
21+
- name: Build JAR with Maven
22+
run: mvn clean package -DskipTests
23+
2024
- name: Download latest HPO
2125
run: |
2226
java -jar target/phenopacket2prompt.jar download -w
@@ -32,19 +36,12 @@ jobs:
3236
3337
- name: Run batch processing
3438
run: |
35-
java -jar target/phenopacket2prompt.jar batch -d phenopackets-data/${{steps.dirout.outputs.DIR_NAME}} -p --full-translations=true
36-
37-
#- name: Check generated files # For debugging purposes
38-
# run: |
39-
# ls prompts/en | head
40-
# ls prompts/it | head
41-
# cat $(ls -p prompts/en | grep -v / | head -n 2 | sed 's|^|prompts/en/|')
39+
java -jar target/phenopacket2prompt.jar batch -d phenopackets-data/${{steps.dirout.outputs.DIR_NAME}} -p --full-translations -j
4240
43-
- name: Set up zipped structure for upload
41+
- name: Zip original phenopackets for upload
4442
run: |
45-
for dir in prompts/*/; do
46-
zip -r "${dir%/}.zip" "$dir" && rm -r "$dir"
47-
done
43+
zip -r prompts/original_phenopackets.zip prompts/original_phenopackets
44+
rm -r prompts/original_phenopackets
4845
4946
- name: Set up Python for subsequent upload to zenodo
5047
uses: actions/setup-python@v4
@@ -58,6 +55,8 @@ jobs:
5855
env:
5956
ZENODO_ACCESS_TOKEN: ${{ secrets.ZENODO_ACCESS_TOKEN }}
6057
ZENODO_DEPOSITION_ID: ${{ secrets.ZENODO_DEPOSITION_ID }}
58+
LATEST_STORE_TAG: ${{ inputs. LATEST_STORE }}
59+
LATEST_HPO_TAG: ${{ inputs.LATEST_HPO }}
6160
run: python scripts/update_and_release_to_zenodo.py prompts/
6261

6362

.github/workflows/look4updates.yml

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ jobs:
1111
permissions:
1212
contents: read
1313
actions: write
14+
outputs:
15+
latest_store_tag: ${{ steps.check.outputs.latest_store_tag }}
16+
latest_hpo_tag: ${{ steps.check.outputs.latest_hpo_tag }}
17+
new_release: ${{ steps.check.outputs.new_release }}
18+
1419

1520
steps:
1621
- uses: actions/checkout@v4
@@ -28,6 +33,13 @@ jobs:
2833
GH_TOKEN: ${{ secrets.GH_PAT_LEO }}
2934
run: python scripts/check_ppktstore_version.py
3035

31-
- name: Run follow-up action
32-
if: steps.check.outputs.new_release == 'true'
33-
uses: ./.github/workflows/create_prompts_and_upload.yml
36+
follow_up_prompt_creation:
37+
needs: check_release
38+
if: needs.check_release.outputs.new_release == 'true'
39+
uses: ./.github/workflows/create_prompts_and_upload.yml
40+
with:
41+
LATEST_STORE: ${{ needs.check_release.outputs.latest_store_tag }}
42+
LATEST_HPO: ${{ needs.check_release.outputs.latest_hpo_tag }}
43+
secrets:
44+
ZENODO_ACCESS_TOKEN: ${{ secrets.ZENODO_ACCESS_TOKEN }}
45+
ZENODO_DEPOSITION_ID: ${{ secrets.ZENODO_DEPOSITION_ID }}

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ hs_err_pid*
2424
replay_pid*
2525
/.idea/
2626
/data/
27+
target/
2728
/prompts/
2829
/fenominal-mined.txt
2930
/p2p_test.txt

scripts/check_ppktstore_version.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ def fetch_with_retry(url, headers, max_retries=3, timeout=10):
3434

3535

3636
ppktstore_repo = "monarch-initiative/phenopacket-store"
37+
hpo_repo = "obophenotype/human-phenotype-ontology"
3738
this_repo = os.environ["GITHUB_REPOSITORY"]
3839
token = os.environ["GH_TOKEN"]
39-
var_name = "LAST_RUN_RELEASE"
40+
ppkt_last_run_version = "LAST_RUN_RELEASE"
4041

4142
# Get phenopacket-store latest version
4243
latest = fetch_with_retry(
@@ -45,41 +46,54 @@ def fetch_with_retry(url, headers, max_retries=3, timeout=10):
4546
).json().get("tag_name")
4647

4748
if not latest:
48-
print("Error: Could not fetch latest release tag!")
49+
print("Error: Could not fetch latest phenopacket-store release tag!")
4950
sys.exit(1)
50-
51+
52+
# Get HPO latest version
53+
latest_hpo = fetch_with_retry(
54+
f"https://api.github.com/repos/{hpo_repo}/releases/latest",
55+
headers={"Accept": "application/vnd.github+json"}
56+
).json().get("tag_name")
57+
58+
if not latest_hpo:
59+
print("Error: Could not fetch latest HPO release tag!")
60+
sys.exit(1)
61+
5162
# Get last version of phenopacket-store that ppkt2prompt ran
5263
r = fetch_with_retry(
53-
f"https://api.github.com/repos/{this_repo}/actions/variables/{var_name}",
64+
f"https://api.github.com/repos/{this_repo}/actions/variables/{ppkt_last_run_version}",
5465
headers={
5566
"Authorization": f"token {token}",
5667
"Accept": "application/vnd.github+json"
5768
}
5869
)
5970

6071
if r.status_code != 200:
61-
print(f"Error: Could not fetch repository variable '{var_name}'! Status code: {r.status_code}")
72+
print(f"Error: Could not fetch repository variable '{ppkt_last_run_version}'! Status code: {r.status_code}")
6273
sys.exit(1)
6374

6475
stored = r.json().get("value")
6576
if stored is None:
66-
print(f"Error: Repository variable '{var_name}' returned no value.")
77+
print(f"Error: Repository variable '{ppkt_last_run_version}' returned no value.")
6778
sys.exit(1)
6879

6980
latest = latest.strip()
7081
stored = stored.strip()
82+
latest_hpo = latest_hpo.strip()
83+
7184
new_release = (latest != stored)
7285

7386
with open(os.environ["GITHUB_OUTPUT"], "a") as gh_out:
74-
gh_out.write(f"latest_tag={latest}\n")
7587
gh_out.write(f"new_release={str(new_release).lower()}\n")
88+
gh_out.write(f"latest_store_tag={latest}\n")
89+
gh_out.write(f"latest_hpo_tag={latest_hpo}\n")
7690

7791

7892
# Update variable if needed
7993
if new_release:
80-
payload = {"name": var_name, "value": latest}
94+
payload = {"name": ppkt_last_run_version, "value": latest}
8195
res = requests.patch(
82-
f"https://api.github.com/repos/{this_repo}/actions/variables/{var_name}",
96+
f"https://api.github.com/repos/{this_repo}/actions/variables/{ppkt_last_run_version}",
8397
headers={"Authorization": f"token {token}",
8498
"Accept": "application/vnd.github+json"},
8599
json=payload,

scripts/update_and_release_to_zenodo.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
ZENODO_API_BASE = "https://zenodo.org/api/deposit/depositions"
99
ACCESS_TOKEN = os.getenv("ZENODO_ACCESS_TOKEN")
1010
DEPOSITION_ID = os.getenv("ZENODO_DEPOSITION_ID")
11+
LATEST_PPKT_STORE = os.getenv("LATEST_STORE_TAG")
12+
LATEST_HPO = os.getenv("LATEST_HPO_TAG")
1113

1214
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
1315

@@ -43,7 +45,9 @@ def create_new_version():
4345

4446
today_date = datetime.today().strftime("%Y-%m-%d")
4547
metadata["publication_date"] = today_date
48+
metadata["notes"] = f"Used phenopacket-store version {LATEST_PPKT_STORE} and HPO version {LATEST_HPO}. Beyond this record, please also cite https://doi.org/10.1016/j.ebiom.2025.105957"
4649
metadata_update = {"metadata": metadata}
50+
#TODO maybe add further data here about number of prompts in each language? Should be easy to do with JSONL
4751

4852
response = requests.put(
4953
f"{ZENODO_API_BASE}/{new_id}",

scripts/zenodo_metadata.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@
2121
"custom": {
2222
"code:codeRepository": "https://github.com/monarch-initiative/phenopacket2prompt",
2323
"code:programmingLanguage": [{"id": "java", "title": {"en": "Java"}}]
24-
}
24+
},
25+
"references": "https://doi.org/10.1016/j.ebiom.2025.105957"
2526
}

src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/DownloadCommand.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,9 @@ public Integer call() throws FileDownloadException, MalformedURLException, URISy
4343
BioDownloaderBuilder builder = BioDownloader.builder(destination);
4444
builder.hpoJson();
4545
builder.overwrite(overwrite);
46-
// TODO there might be a bug in newer hp-international.obo, revert to latest after it is fixed.
47-
//String hpoInternational = "https://github.com/obophenotype/human-phenotype-ontology/releases/latest/download/hp-international.obo";
48-
String hpoInternational = "https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2025-05-06/hp-international.obo";
46+
String hpoInternational = "http://purl.obolibrary.org/obo/hp/translations/hp-all.babelon.json";
4947
URL hpoInternationalUrl = new URI(hpoInternational).toURL() ;
50-
builder.custom("hp-international.obo", hpoInternationalUrl);
48+
builder.custom("hp-all.babelon.json", hpoInternationalUrl);
5149
BioDownloader downloader = builder.build();
5250
List<File> files = downloader.download();
5351
return 0;

src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import picocli.CommandLine;
1414

1515
import java.io.File;
16+
import java.io.IOException;
1617
import java.util.List;
1718
import java.util.concurrent.Callable;
1819

@@ -28,7 +29,7 @@ public class GbtTranslateBatchCommand implements Callable<Integer> {
2829

2930
@CommandLine.Option(names = {"--translations"},
3031
description = "path to translations file")
31-
private String translationsPath = "data/hp-international.obo";
32+
private String translationsPath = "data/hp-all.babelon.json";
3233

3334
@CommandLine.Option(names = {"-o", "--outdir"},
3435
description = "path to outdir")
@@ -47,6 +48,10 @@ public class GbtTranslateBatchCommand implements Callable<Integer> {
4748
defaultValue = "false")
4849
private boolean onlyPatient;
4950

51+
@CommandLine.Option(names = {"-j", "--jsonl-output"},
52+
description = "Only output patient description")
53+
private boolean jsonOutput;
54+
5055
public boolean getPatientFlag() {
5156
return onlyPatient;
5257
}
@@ -55,10 +60,11 @@ public boolean getPatientFlag() {
5560

5661

5762
@Override
58-
public Integer call() {
63+
public Integer call() throws IOException {
5964
File hpJsonFile = new File(hpoJsonPath);
6065
Context.getInstance().setFullTranslations(fullTransl);
6166
Context.getInstance().setOnlyPatient(onlyPatient);
67+
Context.getInstance().setJsonOutput(jsonOutput);
6268

6369
boolean useExactMatching = true;
6470
if (! hpJsonFile.isFile()) {
@@ -142,7 +148,9 @@ public Integer call() {
142148
pcopy.copyFile(file);
143149
}
144150
// output file with correct diagnosis list
145-
Utility.outputCorrectPPKt(correctResultList);
151+
if(!Context.getInstance().isJsonOutput()) {
152+
Utility.outputCorrectPPKt(correctResultList);
153+
}
146154
return 0;
147155
}
148156

src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/TestDriveCommand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ private static List<PpktIndividual> getPpktIndividuals() {
306306
}
307307

308308
@Override
309-
public Integer call() {
309+
public Integer call() throws IOException {
310310
java.io.File hpJsonFile = new java.io.File(hpoJsonPath);
311311
boolean useExactMatching = true;
312312
if (! hpJsonFile.isFile()) {

0 commit comments

Comments
 (0)