Skip to content

Commit 353241e

Browse files
author
mgeipel
committed
fixed #63
1 parent 83ea97d commit 353241e

File tree

6 files changed

+334
-36
lines changed

6 files changed

+334
-36
lines changed

src/main/java/org/culturegraph/mf/formeta/parser/PartialRecordEmitter.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@ public final class PartialRecordEmitter implements Emitter {
2424

2525
private StreamReceiver receiver;
2626
private String defaultName;
27-
28-
public PartialRecordEmitter() {
29-
this(null);
27+
28+
29+
30+
public void setDefaultName(final String defaultName) {
31+
this.defaultName = defaultName;
3032
}
3133

32-
public PartialRecordEmitter(final String defaultName) {
33-
this.defaultName = defaultName;
34+
public String getDefaultName() {
35+
return defaultName;
3436
}
3537

3638
@Override
@@ -40,7 +42,7 @@ public void setReceiver(final StreamReceiver receiver) {
4042

4143
@Override
4244
public void startGroup(final String name, final int nestingLevel) {
43-
if (defaultName != null && name.isEmpty()) {
45+
if (nestingLevel == 0 && defaultName != null && name.isEmpty()) {
4446
receiver.startEntity(defaultName);
4547
} else {
4648
receiver.startEntity(name);
@@ -54,7 +56,7 @@ public void endGroup(final int nestingLevel) {
5456

5557
@Override
5658
public void literal(final String name, final String value, final int nestingLevel) {
57-
if (defaultName != null && name.isEmpty()) {
59+
if (nestingLevel == 0 && defaultName != null && name.isEmpty()) {
5860
receiver.literal(defaultName, value);
5961
} else {
6062
receiver.literal(name, value);

src/main/java/org/culturegraph/mf/stream/converter/StreamToTriples.java

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,17 @@
2020
import java.util.regex.Matcher;
2121
import java.util.regex.Pattern;
2222

23+
import org.culturegraph.mf.formeta.formatter.ConciseFormatter;
24+
import org.culturegraph.mf.formeta.formatter.Formatter;
2325
import org.culturegraph.mf.framework.DefaultStreamPipe;
2426
import org.culturegraph.mf.framework.ObjectReceiver;
2527
import org.culturegraph.mf.framework.StreamReceiver;
2628
import org.culturegraph.mf.framework.annotations.Description;
2729
import org.culturegraph.mf.framework.annotations.In;
2830
import org.culturegraph.mf.framework.annotations.Out;
2931
import org.culturegraph.mf.types.Triple;
32+
import org.culturegraph.mf.types.Triple.ObjectType;
33+
import org.culturegraph.mf.util.StreamConstants;
3034

3135
/**
3236
*
@@ -42,12 +46,17 @@ public final class StreamToTriples extends DefaultStreamPipe<ObjectReceiver<Trip
4246
public static final char SEPARATOR = '\u001e';
4347

4448
private static final Pattern REDIRECT_PATTERN = Pattern.compile("^\\{to:(.+)}(.+)$");
45-
private static final String ID = "_id";
4649

4750
private final List<String> nameBuffer = new ArrayList<String>();
4851
private final List<String> valueBuffer = new ArrayList<String>();
4952
private String currentId;
5053
private boolean redirect;
54+
private final Formatter formatter = new ConciseFormatter();
55+
56+
private int entityDepth;
57+
private String currentEntityName;
58+
59+
5160

5261
public void setRedirect(final boolean redirect) {
5362
this.redirect = redirect;
@@ -56,26 +65,60 @@ public void setRedirect(final boolean redirect) {
5665
@Override
5766
public void startRecord(final String identifier) {
5867
assert !isClosed();
68+
entityDepth = 0;
5969
this.currentId = identifier;
6070
}
6171

72+
@Override
73+
public void startEntity(final String name) {
74+
if (entityDepth == 0) {
75+
currentEntityName = name;
76+
formatter.startGroup("");
77+
} else {
78+
formatter.startGroup(name);
79+
}
80+
++entityDepth;
81+
82+
}
83+
84+
@Override
85+
public void endEntity() {
86+
--entityDepth;
87+
if (entityDepth == 0) {
88+
formatter.endGroup();
89+
dispatch(currentEntityName, formatter.toString(), ObjectType.ENTITY);
90+
formatter.reset();
91+
} else {
92+
formatter.endGroup();
93+
}
94+
}
95+
6296
@Override
6397
public void literal(final String name, final String value) {
6498
assert !isClosed();
99+
if (entityDepth == 0) {
100+
dispatch(name, value, ObjectType.STRING);
101+
} else {
102+
formatter.literal(name, value);
103+
}
104+
105+
}
106+
107+
private void dispatch(final String name, final String value, final ObjectType type) {
65108
if (redirect) {
66-
if (ID.equals(name)) {
109+
if (StreamConstants.ID.equals(name)) {
67110
currentId = value;
68111
} else {
69112
final Matcher matcher = REDIRECT_PATTERN.matcher(name);
70113
if (matcher.find()) {
71-
getReceiver().process(new Triple(matcher.group(1), matcher.group(2), value));
114+
getReceiver().process(new Triple(matcher.group(1), matcher.group(2), value, type));
72115
} else {
73116
nameBuffer.add(name);
74117
valueBuffer.add(value);
75118
}
76119
}
77120
} else {
78-
getReceiver().process(new Triple(currentId, name, value));
121+
getReceiver().process(new Triple(currentId, name, value, type));
79122
}
80123
}
81124

src/main/java/org/culturegraph/mf/stream/pipe/sort/TripleCollect.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@
1515
*/
1616
package org.culturegraph.mf.stream.pipe.sort;
1717

18+
import org.culturegraph.mf.formeta.parser.FormetaParser;
19+
import org.culturegraph.mf.formeta.parser.PartialRecordEmitter;
1820
import org.culturegraph.mf.framework.DefaultObjectPipe;
1921
import org.culturegraph.mf.framework.StreamReceiver;
2022
import org.culturegraph.mf.framework.annotations.Description;
2123
import org.culturegraph.mf.framework.annotations.In;
2224
import org.culturegraph.mf.framework.annotations.Out;
2325
import org.culturegraph.mf.types.Triple;
26+
import org.culturegraph.mf.types.Triple.ObjectType;
2427

2528
/**
2629
* Collects named values to form records.
@@ -32,23 +35,40 @@
3235
@In(Triple.class)
3336
@Out(StreamReceiver.class)
3437
public final class TripleCollect extends DefaultObjectPipe<Triple, StreamReceiver> {
35-
38+
private final FormetaParser parser = new FormetaParser();
39+
private final PartialRecordEmitter emitter = new PartialRecordEmitter();
40+
3641
private String currentSubject;
3742

43+
public TripleCollect() {
44+
parser.setEmitter(emitter);
45+
}
46+
3847
@Override
3948
public void process(final Triple triple) {
4049
if (currentSubject == null) {
4150
currentSubject = triple.getSubject();
4251
getReceiver().startRecord(currentSubject);
4352
}
44-
53+
4554
if (currentSubject.equals(triple.getSubject())) {
46-
getReceiver().literal(triple.getPredicate(), triple.getObject());
55+
decodeTriple(triple);
4756
} else {
4857
getReceiver().endRecord();
4958
currentSubject = triple.getSubject();
5059
getReceiver().startRecord(currentSubject);
60+
decodeTriple(triple);
61+
}
62+
}
63+
64+
public void decodeTriple(final Triple triple) {
65+
if(triple.getObjectType() == ObjectType.STRING){
5166
getReceiver().literal(triple.getPredicate(), triple.getObject());
67+
}else if (triple.getObjectType() == ObjectType.ENTITY){
68+
emitter.setDefaultName(triple.getPredicate());
69+
parser.parse(triple.getObject());
70+
}else{
71+
throw new UnsupportedOperationException(triple.getObjectType() + " can not yet be decoded");
5272
}
5373
}
5474

@@ -63,5 +83,10 @@ protected void onCloseStream() {
6383
currentSubject = null;
6484
getReceiver().endRecord();
6585
}
86+
87+
@Override
88+
protected void onSetReceiver() {
89+
emitter.setReceiver(getReceiver());
90+
}
6691

6792
}

src/main/java/org/culturegraph/mf/types/Triple.java

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,59 +20,94 @@
2020
import java.io.ObjectOutputStream;
2121

2222
/**
23-
* Stores an immutable name-value-pair. The hash code is
24-
* precomputed during instantiation.
23+
* Stores an immutable name-value-pair. The hash code is precomputed during
24+
* instantiation.
2525
*
2626
* @author Markus Michael Geipel
2727
*/
28-
public final class Triple implements Comparable<Triple> {
29-
28+
public final class Triple implements Comparable<Triple> {
29+
30+
/**
31+
* Content type of triple object
32+
*/
33+
public enum ObjectType {
34+
STRING, ENTITY
35+
}
36+
3037
private static final int MAGIC1 = 23;
3138
private static final int MAGIC2 = 31;
3239
private static final int MAGIC3 = 17;
3340
private final String subject;
3441
private final String predicate;
3542
private final String object;
43+
private final ObjectType objectType;
44+
3645
private final int preCompHashCode;
37-
46+
3847
public Triple(final String subject, final String predicate, final String object) {
3948
this.subject = subject;
4049
this.predicate = predicate;
4150
this.object = object;
51+
objectType = ObjectType.STRING;
4252
int result = MAGIC1;
4353
result = MAGIC2 * result + predicate.hashCode();
4454
result = MAGIC2 * result + object.hashCode();
4555
result = MAGIC3 * result + subject.hashCode();
56+
result = MAGIC3 * result + objectType.hashCode();
4657
preCompHashCode = result;
58+
59+
4760
}
4861

49-
62+
public Triple(final String subject, final String predicate, final String object, final ObjectType objectType) {
63+
this.subject = subject;
64+
this.predicate = predicate;
65+
this.object = object;
66+
this.objectType = objectType;
67+
int result = MAGIC1;
68+
result = MAGIC2 * result + predicate.hashCode();
69+
result = MAGIC2 * result + object.hashCode();
70+
result = MAGIC3 * result + subject.hashCode();
71+
result = MAGIC3 * result + objectType.hashCode();
72+
preCompHashCode = result;
73+
74+
75+
}
76+
77+
5078
/**
5179
* @return object
5280
*/
5381
public String getObject() {
5482
return object;
5583
}
56-
84+
5785
/**
5886
* @return predicate
5987
*/
6088
public String getPredicate() {
6189
return predicate;
6290
}
6391

92+
/**
93+
* @return object type
94+
*/
95+
public ObjectType getObjectType() {
96+
return objectType;
97+
}
98+
6499
/**
65100
* @return subject
66101
*/
67102
public String getSubject() {
68103
return subject;
69104
}
70-
71-
public static Triple read(final ObjectInputStream in) throws IOException{
105+
106+
public static Triple read(final ObjectInputStream in) throws IOException {
72107
return new Triple(in.readUTF(), in.readUTF(), in.readUTF());
73108
}
74-
75-
public void write(final ObjectOutputStream out) throws IOException{
109+
110+
public void write(final ObjectOutputStream out) throws IOException {
76111
out.writeUTF(subject);
77112
out.writeUTF(predicate);
78113
out.writeUTF(object);
@@ -87,28 +122,29 @@ public int hashCode() {
87122
public boolean equals(final Object obj) {
88123
if (obj instanceof Triple) {
89124
final Triple triple = (Triple) obj;
90-
return triple.preCompHashCode == preCompHashCode
91-
&& triple.predicate.equals(predicate)
92-
&& triple.object.equals(object)
93-
&& triple.subject.equals(subject);
125+
return triple.preCompHashCode == preCompHashCode && triple.predicate.equals(predicate)
126+
&& triple.object.equals(object) && triple.subject.equals(subject) && triple.objectType == objectType;
94127
}
95128
return false;
96129
}
97130

98131
@Override
99-
public int compareTo(final Triple namedValue) {
100-
int result = subject.compareTo(namedValue.subject);
132+
public int compareTo(final Triple triple) {
133+
int result = subject.compareTo(triple.subject);
101134
if (result == 0) {
102-
result = predicate.compareTo(namedValue.predicate);
103-
if(result == 0){
104-
return object.compareTo(namedValue.object);
135+
result = predicate.compareTo(triple.predicate);
136+
if (result == 0) {
137+
result = object.compareTo(triple.object);
138+
if(result == 0){
139+
return objectType.compareTo(triple.objectType);
140+
}
105141
}
106142
}
107143
return result;
108144
}
109-
145+
110146
@Override
111147
public String toString() {
112-
return subject + ":" + predicate + "=" + object;
148+
return subject + ":" + predicate + "=" + object + " (" + objectType + ")";
113149
}
114150
}

0 commit comments

Comments
 (0)