1
+ import hashlib
2
+ import os
3
+ from tempfile import TemporaryDirectory
4
+
5
+ import pytest
6
+
7
+ from repo2docker .contentproviders import Dataverse
8
+
9
+ test_dv = Dataverse ()
10
+ harvard_dv = next (_ for _ in test_dv .hosts if _ ["name" ] == "Harvard Dataverse" )
11
+ cimmyt_dv = next (_ for _ in test_dv .hosts if _ ["name" ] == "CIMMYT Research Data" )
12
+
13
+ @pytest .mark .parametrize (
14
+ ("doi" , "resolved" ),
15
+ [
16
+ ("doi:10.7910/DVN/6ZXAGT/3YRRYJ" , {"host" : harvard_dv , "record" : "doi:10.7910/DVN/6ZXAGT" }),
17
+ ("10.7910/DVN/6ZXAGT/3YRRYJ" , {"host" : harvard_dv , "record" : "doi:10.7910/DVN/6ZXAGT" }),
18
+ ("https://dataverse.harvard.edu/api/access/datafile/3323458" , {"host" : harvard_dv , "record" : "doi:10.7910/DVN/3MJ7IR" }),
19
+ ("https://data.cimmyt.org/dataset.xhtml?persistentId=hdl:11529/10016" , {"host" : cimmyt_dv , "record" : "hdl:11529/10016" }),
20
+ ("/some/random/string" , None ),
21
+ ("https://example.com/path/here" , None ),
22
+ # Non dataverse DOIs
23
+ ("https://doi.org/10.21105/joss.01277" , None )
24
+ ]
25
+ )
26
+ def test_detect (doi , resolved ):
27
+ assert Dataverse ().detect (doi ) == resolved
28
+
29
+
30
+ def test_dataverse_fetch ():
31
+ spec = {"host" : harvard_dv , "record" : "doi:10.7910/DVN/TJCLKP" }
32
+
33
+ dv = Dataverse ()
34
+
35
+ with TemporaryDirectory () as d :
36
+ output = []
37
+ for l in dv .fetch (spec , d ):
38
+ output .append (l )
39
+
40
+ # Verify two directories
41
+ assert set (os .listdir (d )) == {"data" , "code" }
42
+
43
+ # Verify sha256sum of three files
44
+ expected_sha = {
45
+ 'data/primary/primary-data.zip' : '880f99a1e1d54a2553be61301f92e06b29236785b8d4d1b7ad0b4595d9d7512b' ,
46
+ 'data/2023-01-03.tsv' : 'cc9759e8e6bc076dd7c1a8eb53a7ea3d38e8697fa9f544d15768db308516cc5f' ,
47
+ 'code/language.py' : '1ffb3b3cdc9de01279779f3fc88824672c8ec3ab1c41ecdd5c1b59a9b0202215'
48
+ }
49
+
50
+ for subpath , expected_sha in expected_sha .items ():
51
+ with open (os .path .join (d , subpath ), 'rb' ) as f :
52
+ h = hashlib .sha256 ()
53
+ h .update (f .read ())
54
+ assert h .hexdigest () == expected_sha
0 commit comments