Skip to content

Commit 7fd6982

Browse files
RohanBhattaraiNPtmorrellAbakahAlexander
authored
Add automated tests
Co-authored-by: Tom Morrell <[email protected]> Co-authored-by: Alexander Abeiku Abakah <[email protected]> Co-authored-by: RohanBhattaraiNP <[email protected]>
1 parent 00b39bd commit 7fd6982

File tree

23 files changed

+325
-4001
lines changed

23 files changed

+325
-4001
lines changed

.github/workflows/bot.yaml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: Bot validation
2+
3+
on:
4+
push:
5+
paths:
6+
- 'caltechdata_api/cli.py'
7+
- 'caltechdata_api/customize_schema.py'
8+
- 'caltechdata_api/caltechdata_write.py'
9+
- 'caltechdata_api/caltechdata_edit.py'
10+
- 'README.md'
11+
pull_request:
12+
paths:
13+
- 'caltechdata_api/cli.py'
14+
- 'caltechdata_api/customize_schema.py'
15+
- 'caltechdata_api/caltechdata_write.py'
16+
- 'caltechdata_api/caltechdata_edit.py'
17+
- 'README.md'
18+
19+
jobs:
20+
validate-metadata:
21+
runs-on: ubuntu-latest
22+
23+
steps:
24+
- uses: actions/checkout@v4
25+
with:
26+
fetch-depth: 0
27+
28+
- name: Set up Python
29+
uses: actions/setup-python@v4
30+
with:
31+
python-version: '3.x'
32+
33+
- name: Check for Required Environment Variables
34+
env:
35+
CALTECHDATA_TOKEN: ${{ secrets.CALTECHDATA_TOKEN }}
36+
run: |
37+
if [ -z "$CALTECHDATA_TOKEN" ]; then
38+
echo "Error: CALTECHDATA_TOKEN environment variable is not set"
39+
exit 1
40+
fi
41+
42+
- name: Install dependencies
43+
run: |
44+
python -m pip install --upgrade pip
45+
pip install pytest requests s3fs cryptography
46+
pip install .
47+
48+
- name: Run CaltechDATA Metadata Validation
49+
env:
50+
CALTECHDATA_TOKEN: ${{ secrets.CALTECHDATA_TOKEN }}
51+
run: |
52+
python tests/bot_yaml.py
53+
- name: Run Unit Tests
54+
run: |
55+
cd tests
56+
pytest test_unit.py
57+
pytest test_rdm.py

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ keywords:
2222
- metadata
2323
- software
2424
- InvenioRDM
25-
date-released: 2024-12-18
25+

caltechdata_api/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
caltechdata_unembargo,
1010
caltechdata_accept,
1111
)
12-
from .customize_schema import customize_schema
12+
from .customize_schema import customize_schema, validate_metadata
1313
from .get_metadata import get_metadata
1414
from .download_file import download_file, download_url
1515
from .utils import humanbytes
16+
from .md_to_json import parse_readme_to_json

caltechdata_api/cli.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import argparse
22
import requests
33
import s3fs
4-
from caltechdata_api import caltechdata_write, caltechdata_edit
5-
from md_to_json import parse_readme_to_json
4+
from caltechdata_api import caltechdata_write, caltechdata_edit, parse_readme_to_json
65
import json
76
import os
87
from cryptography.fernet import Fernet
@@ -34,7 +33,7 @@ def generate_key():
3433
return Fernet.generate_key()
3534

3635

37-
# Load the key from a file or generate a new one if not present
36+
# Load the key from a file or generate a new one if not present.
3837
def load_or_generate_key():
3938
key_file = os.path.join(caltechdata_directory, "key.key")
4039
if os.path.exists(key_file):
@@ -59,11 +58,17 @@ def decrypt_token(encrypted_token, key):
5958
return f.decrypt(encrypted_token).decode()
6059

6160

62-
# Function to get or set token with support for test system
61+
# Function to get or set token with support for test systems
6362
def get_or_set_token(production=True):
63+
# First check for environment variable
64+
env_token = os.environ.get("CALTECHDATA_TOKEN")
65+
if env_token:
66+
print("Using token from environment variable")
67+
return env_token
68+
6469
key = load_or_generate_key()
6570

66-
# Use different token files for production and test environments
71+
# Use different token files for production and test environment
6772
token_filename = "token.txt" if production else "token_test.txt"
6873
token_file = os.path.join(caltechdata_directory, token_filename)
6974

caltechdata_api/customize_schema.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -480,18 +480,17 @@ def validate_metadata(json_record):
480480
errors.append("Each 'subject' must have a 'subject' key.")
481481

482482
# Check for 'dates'
483-
if "dates" not in json_record:
484-
errors.append("'dates' field is missing.")
485-
elif not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
486-
errors.append("'dates' should be a non-empty list.")
487-
else:
488-
for date in json_record["dates"]:
489-
if (
490-
not isinstance(date, dict)
491-
or "date" not in date
492-
or "dateType" not in date
493-
):
494-
errors.append("Each 'date' must have 'date' and 'dateType'.")
483+
if "dates" in json_record:
484+
if not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
485+
errors.append("'dates' should be a non-empty list.")
486+
else:
487+
for date in json_record["dates"]:
488+
if (
489+
not isinstance(date, dict)
490+
or "date" not in date
491+
or "dateType" not in date
492+
):
493+
errors.append("Each 'date' must have 'date' and 'dateType'.")
495494

496495
# Check for 'creators'
497496
if "creators" not in json_record:
@@ -601,10 +600,9 @@ def validate_metadata(json_record):
601600
errors.append("'publisher' should be a string.")
602601

603602
# Check for 'publicationYear'
604-
if "publicationYear" not in json_record:
605-
errors.append("'publicationYear' field is missing.")
606-
elif not isinstance(json_record["publicationYear"], str):
607-
errors.append("'publicationYear' should be a string.")
603+
if "publicationYear" in json_record:
604+
if not isinstance(json_record["publicationYear"], str):
605+
errors.append("'publicationYear' should be a string.")
608606

609607
# Check for 'types'
610608
if "types" not in json_record:

tests/10.22002-D1.1098

-15.6 MB
Binary file not shown.

tests/bot_yaml.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import subprocess
2+
import time
3+
from unittest.mock import patch
4+
import sys
5+
import os
6+
import json
7+
import requests
8+
from datetime import datetime
9+
import pytest
10+
import importlib.util
11+
import traceback
12+
13+
14+
class CaltechDataTester:
15+
def __init__(self):
16+
# Use GitHub Actions environment or create a local test directory
17+
self.test_dir = os.environ.get(
18+
"GITHUB_WORKSPACE", os.path.join(os.getcwd(), "caltech_test_data")
19+
)
20+
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
21+
22+
# Ensure test directory exists
23+
os.makedirs(self.test_dir, exist_ok=True)
24+
25+
# Create test run directory
26+
self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
27+
os.makedirs(self.test_run_dir, exist_ok=True)
28+
29+
# Initialize logging
30+
self.log_file = os.path.join(self.test_run_dir, "test_log.txt")
31+
32+
def log(self, message):
33+
"""Log message to both console and file"""
34+
print(message)
35+
with open(self.log_file, "a") as f:
36+
f.write(f"{datetime.now()}: {message}\n")
37+
38+
def create_test_files(self):
39+
"""Create necessary test files"""
40+
csv_path = os.path.join(self.test_run_dir, "test_data.csv")
41+
with open(csv_path, "w") as f:
42+
f.write("date,temperature,humidity\n")
43+
f.write("2023-01-01,25.5,60\n")
44+
f.write("2023-01-02,26.0,62\n")
45+
f.write("2023-01-03,24.8,65\n")
46+
47+
self.log(f"Created test CSV file: {csv_path}")
48+
return csv_path
49+
50+
def import_cli_module(self):
51+
"""Dynamically import cli module from the correct path"""
52+
cli_path = os.path.join(
53+
os.environ.get("GITHUB_WORKSPACE", os.getcwd()), "caltechdata_api", "cli.py"
54+
)
55+
spec = importlib.util.spec_from_file_location("cli", cli_path)
56+
cli_module = importlib.util.module_from_spec(spec)
57+
spec.loader.exec_module(cli_module)
58+
return cli_module
59+
60+
def generate_test_responses(self):
61+
"""Generate test responses for CLI prompts"""
62+
return {
63+
"Do you want to create or edit a CaltechDATA record? (create/edit): ": "create",
64+
"Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create",
65+
"Enter the title of the dataset: ": f"Test Dataset {self.timestamp}",
66+
"Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.",
67+
"Enter the number corresponding to the desired license: ": "1",
68+
"Enter your ORCID identifier: ": os.environ.get(
69+
"TEST_ORCID", "0000-0002-1825-0097"
70+
),
71+
"How many funding entries do you want to provide? ": "1",
72+
"Enter the award number for funding: ": "NSF-1234567",
73+
"Enter the award title for funding: ": "Automated Testing Grant",
74+
"Enter the funder ROR (https://ror.org): ": "021nxhr62",
75+
"Do you want to upload or link data files? (upload/link/n): ": "upload",
76+
"Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv",
77+
"Do you want to add more files? (y/n): ": "n",
78+
"Do you want to send this record to CaltechDATA? (y/n): ": "y",
79+
}
80+
81+
def run_test_submission(self):
82+
"""Run the complete test submission process"""
83+
try:
84+
self.log("Starting test submission process...")
85+
86+
# Create test files
87+
test_csv = self.create_test_files()
88+
89+
# Dynamically import cli module
90+
cli_module = self.import_cli_module()
91+
92+
# Generate responses
93+
responses = self.generate_test_responses()
94+
95+
# Setup output capture
96+
class OutputCapture:
97+
def __init__(self):
98+
self.output = []
99+
100+
def write(self, text):
101+
self.output.append(text)
102+
sys.__stdout__.write(text)
103+
104+
def flush(self):
105+
pass
106+
107+
def get_output(self):
108+
return "".join(self.output)
109+
110+
output_capture = OutputCapture()
111+
sys.stdout = output_capture
112+
113+
# Mock input and run CLI
114+
def mock_input(prompt):
115+
self.log(f"Prompt: {prompt}")
116+
if prompt in responses:
117+
response = responses[prompt]
118+
self.log(f"Response: {response}")
119+
return response
120+
return ""
121+
122+
with patch("builtins.input", side_effect=mock_input):
123+
# Use -test flag to use test mode
124+
sys.argv = [sys.argv[0], "-test"]
125+
cli_module.main()
126+
127+
# Restore stdout
128+
sys.stdout = sys.__stdout__
129+
130+
return True
131+
132+
except Exception as e:
133+
self.log(f"Error in test submission: {e}")
134+
traceback.print_exc()
135+
return False
136+
finally:
137+
# Cleanup
138+
if "test_csv" in locals() and os.path.exists(test_csv):
139+
os.remove(test_csv)
140+
self.log("Test files cleaned up")
141+
142+
143+
def main():
144+
tester = CaltechDataTester()
145+
146+
success = tester.run_test_submission()
147+
148+
if success:
149+
tester.log("\n🎉 Test submission completed successfully!")
150+
sys.exit(0)
151+
else:
152+
tester.log("\n❌ Test submission failed - check logs for details")
153+
sys.exit(1)
154+
155+
156+
if __name__ == "__main__":
157+
main()

0 commit comments

Comments
 (0)