File tree Expand file tree Collapse file tree 6 files changed +49
-1052
lines changed
gnd/crisscross-connections
src/main/antlr3/org/culturegraph/mf/flux/parser Expand file tree Collapse file tree 6 files changed +49
-1052
lines changed Original file line number Diff line number Diff line change 1
1
//creates a beacon file based on a pica+ dump of the DNB CBS data.
2
2
3
- default out = dump + ".beacon";
3
+ default out = dump + "-" + type + " .beacon";
4
4
default header = FLUX_DIR + "header.txt";
5
+ default type = "ALL";
5
6
6
7
//read header
7
8
"reading header " + header | write("stdout");
@@ -16,12 +17,12 @@ as-lines|
16
17
catch-object-exception|
17
18
decode-pica|
18
19
batch-log(batchsize="100000")|
19
- morph(FLUX_DIR + "extract.xml")|
20
+ morph(FLUX_DIR + "extract.xml", * )|
20
21
stream-to-triples(redirect="true")|
21
22
sort-triples(by="subject")|
22
23
collect-triples|
23
24
morph(FLUX_DIR + "output.xml")|
24
- batch-log("merged ${totalRecords}",batchsize="100000")|
25
+ batch-log("merged ${totalRecords}", batchsize="100000")|
25
26
stream-to-triples|
26
27
template("${s}")|
27
28
@Y;
Original file line number Diff line number Diff line change 9
9
<equals string =" Tp" />
10
10
</data >
11
11
12
- <combine name =" {to:${ref}}refed" value =" " >
13
- <data source =" 041A*.9" name =" ref" >
12
+ <combine name =" {to:${ref}}refed" value =" ${v}" >
13
+ <data
14
+ source=" 041A*.9|028A.9|029B*.9|028C*.9|028Q*.9|028P*.9|028F*.9|028M*.9|028D*.9|028E*.9"
15
+ name=" ref" >
14
16
<trim />
15
17
<unique />
16
18
</data >
17
- </combine >
18
-
19
-
20
19
20
+ <choose name =" v" >
21
+ <!-- DBSM: -->
22
+ <!-- (006U $0 “04p01*”) or (017A $a “yy”) -->
23
+ <data source =" 006U.0" >
24
+ <regexp match =" 04p01.*" />
25
+ <constant value =" DBSM|ALL" />
26
+ </data >
27
+ <data source =" 017A.a" >
28
+ <equals string =" yy" />
29
+ <constant value =" DBSM|ALL" />
30
+ </data >
31
+ <!-- DEA: -->
32
+ <!-- (001@ $a 2””) or (209A $f “Exilarchiv” or 209A $f “HB/EB”) -->
33
+
34
+ <equals string =" 2" />
35
+ <constant value =" DEA|ALL" />
36
+ </data >
37
+ <data source =" 209A.f" >
38
+ <equals string =" HB/EB" />
39
+ <constant value =" DEA|ALL" />
40
+ </data >
41
+ <!-- DMA: -->
42
+ <!-- (002@ $0 “G*” or 002@ $0 “M*”) or (006U $0 “10,P01*”) -->
43
+
44
+ <regexp match =" ^[GM].*" />
45
+ <constant value =" DMA|ALL" />
46
+ </data >
47
+ <data source =" 006U.0" >
48
+ <regexp match =" ^10,P01.*" />
49
+ <constant value =" DMA|ALL" />
50
+ </data >
51
+ <data source =" _id" >
52
+ <constant value =" TITLE|ALL" />
53
+ </data >
54
+ <postprocess >
55
+ <regexp match =" $[type]" />
56
+ </postprocess >
57
+ </choose >
58
+
59
+ </combine >
21
60
22
61
</rules >
23
62
</metamorph >
Original file line number Diff line number Diff line change 221
221
</choose >
222
222
223
223
<choose name =" ent" >
224
- <concat delimiter =" ; " name =" ent" >
224
+ <concat delimiter =" | " name =" ent" >
225
225
<data source =" 004B.a" />
226
226
</concat >
227
227
<data source =" _id" >
You can’t perform that action at this time.
0 commit comments