Skip to content

Commit 1ec7509

Browse files
committed
Merge pull request #66 from cboehme/records-to-triples
Added support for encoding complete records in a single triple
2 parents e9e43ab + 08929fb commit 1ec7509

File tree

2 files changed

+155
-63
lines changed

2 files changed

+155
-63
lines changed

src/main/java/org/culturegraph/mf/stream/converter/StreamToTriples.java

Lines changed: 83 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,56 +38,98 @@
3838
*
3939
*/
4040
@Description("Takes literals from a stream and emits them as triples such "
41-
+ "that the name and value become predicate and object and the record id the subject."
42-
+ "If 'redirect' is true, use '_id' to change the id, or '{to:ID}NAME' to change the id of a single literal.")
41+
+ "that the name and value become predicate and object and the record id the subject. "
42+
+ "If 'redirect' is true, use '_id' to change the id, or '{to:ID}NAME' to change the id of a single literal. "
43+
+ "Set 'recordPredicate' to encode a complete record in one triple. The value of 'recordPredicate' is used "
44+
+ "as the predicate of the triple. If 'recordPredicate' is set, no {to:ID}NAME-style redirects are possible.")
4345
@In(StreamReceiver.class)
4446
@Out(Triple.class)
4547
public final class StreamToTriples extends DefaultStreamPipe<ObjectReceiver<Triple>> {
46-
public static final char SEPARATOR = '\u001e';
4748

4849
private static final Pattern REDIRECT_PATTERN = Pattern.compile("^\\{to:(.+)}(.+)$");
4950

5051
private final List<String> nameBuffer = new ArrayList<String>();
5152
private final List<String> valueBuffer = new ArrayList<String>();
52-
private String currentId;
53-
private boolean redirect;
5453
private final Formatter formatter = new ConciseFormatter();
5554

56-
private int entityDepth;
57-
private String currentEntityName;
58-
59-
55+
private boolean redirect;
56+
private String recordPredicate;
57+
58+
private int nestingLevel;
59+
private int encodeLevel;
60+
private String predicateName;
61+
private String currentId;
6062

63+
public boolean isRedirect() {
64+
return redirect;
65+
}
66+
6167
public void setRedirect(final boolean redirect) {
6268
this.redirect = redirect;
6369
}
70+
71+
public String getRecordPredicate() {
72+
return recordPredicate;
73+
}
74+
75+
public void setRecordPredicate(final String recordPredicate) {
76+
this.recordPredicate = recordPredicate;
77+
}
6478

6579
@Override
6680
public void startRecord(final String identifier) {
6781
assert !isClosed();
68-
entityDepth = 0;
69-
this.currentId = identifier;
82+
83+
currentId = identifier;
84+
85+
if (recordPredicate != null) {
86+
encodeLevel = 0;
87+
startEncode(recordPredicate);
88+
} else {
89+
encodeLevel = 1;
90+
}
91+
92+
nestingLevel = 1;
93+
}
94+
95+
@Override
96+
public void endRecord() {
97+
assert !isClosed();
98+
99+
nestingLevel = 0;
100+
101+
if (nestingLevel == encodeLevel) {
102+
endEncode();
103+
}
104+
105+
if (redirect) {
106+
for (int i = 0; i < nameBuffer.size(); ++i) {
107+
getReceiver().process(new Triple(currentId, nameBuffer.get(i), valueBuffer.get(i)));
108+
}
109+
nameBuffer.clear();
110+
valueBuffer.clear();
111+
}
70112
}
71113

72114
@Override
73115
public void startEntity(final String name) {
74-
if (entityDepth == 0) {
75-
currentEntityName = name;
76-
formatter.startGroup("");
77-
} else {
116+
assert !isClosed();
117+
118+
if (nestingLevel > encodeLevel) {
78119
formatter.startGroup(name);
120+
} else {
121+
startEncode(name);
79122
}
80-
++entityDepth;
81-
123+
++nestingLevel;
82124
}
83125

84126
@Override
85127
public void endEntity() {
86-
--entityDepth;
87-
if (entityDepth == 0) {
88-
formatter.endGroup();
89-
dispatch(currentEntityName, formatter.toString(), ObjectType.ENTITY);
90-
formatter.reset();
128+
assert !isClosed();
129+
130+
--nestingLevel;
131+
if (nestingLevel == encodeLevel) {
132+
endEncode();
91133
} else {
92134
formatter.endGroup();
93135
}
@@ -96,12 +138,27 @@ public void endEntity() {
96138
@Override
97139
public void literal(final String name, final String value) {
98140
assert !isClosed();
99-
if (entityDepth == 0) {
100-
dispatch(name, value, ObjectType.STRING);
141+
142+
if (nestingLevel > encodeLevel) {
143+
if (nestingLevel == 1 && redirect && StreamConstants.ID.equals(name)) {
144+
currentId = value;
145+
} else {
146+
formatter.literal(name, value);
147+
}
101148
} else {
102-
formatter.literal(name, value);
149+
dispatch(name, value, ObjectType.STRING);
103150
}
104-
151+
}
152+
153+
private void startEncode(final String predicate) {
154+
predicateName = predicate;
155+
formatter.reset();
156+
formatter.startGroup("");
157+
}
158+
159+
private void endEncode() {
160+
formatter.endGroup();
161+
dispatch(predicateName, formatter.toString(), ObjectType.ENTITY);
105162
}
106163

107164
private void dispatch(final String name, final String value, final ObjectType type) {
@@ -122,15 +179,4 @@ private void dispatch(final String name, final String value, final ObjectType ty
122179
}
123180
}
124181

125-
@Override
126-
public void endRecord() {
127-
assert !isClosed();
128-
if (redirect) {
129-
for (int i = 0; i < nameBuffer.size(); ++i) {
130-
getReceiver().process(new Triple(currentId, nameBuffer.get(i), valueBuffer.get(i)));
131-
}
132-
nameBuffer.clear();
133-
valueBuffer.clear();
134-
}
135-
}
136182
}

src/test/java/org/culturegraph/mf/stream/converter/StreamToTriplesTest.java

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020
import org.culturegraph.mf.types.Triple;
2121
import org.culturegraph.mf.types.Triple.ObjectType;
2222
import org.culturegraph.mf.util.StreamConstants;
23+
import org.junit.After;
24+
import org.junit.Before;
2325
import org.junit.Test;
26+
import org.mockito.Mock;
2427
import org.mockito.Mockito;
28+
import org.mockito.MockitoAnnotations;
2529

2630
/**
2731
* Tests {@link StreamToTriples}
@@ -36,15 +40,27 @@ public final class StreamToTriplesTest {
3640
private static final String ENTITY_NAME = "ename";
3741
private static final String REC_ID = "id";
3842
private static final String REC_ALT_ID = "altid";
39-
43+
private static final String RECORD_PREDICATE = "rec_pred";
44+
45+
private StreamToTriples toTriples;
46+
47+
@Mock
48+
private ObjectReceiver<Triple> receiver;
49+
50+
@Before
51+
public void setup() {
52+
MockitoAnnotations.initMocks(this);
53+
toTriples = new StreamToTriples();
54+
toTriples.setReceiver(receiver);
55+
}
56+
57+
@After
58+
public void cleanup() {
59+
toTriples.closeStream();
60+
}
61+
4062
@Test
4163
public void testShouldBuildTripleFromLiteral() {
42-
final StreamToTriples toTriples = new StreamToTriples();
43-
@SuppressWarnings("unchecked")
44-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
45-
46-
toTriples.setReceiver(receiver);
47-
4864
toTriples.startRecord(REC_ID);
4965
toTriples.literal(NAME, VALUE);
5066
toTriples.endRecord();
@@ -54,12 +70,6 @@ public void testShouldBuildTripleFromLiteral() {
5470

5571
@Test
5672
public void testShouldEncodeEntities() {
57-
final StreamToTriples toTriples = new StreamToTriples();
58-
@SuppressWarnings("unchecked")
59-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
60-
61-
toTriples.setReceiver(receiver);
62-
6373
toTriples.startRecord(REC_ID);
6474
toTriples.startEntity(ENTITY_NAME);
6575
toTriples.literal(NAME, VALUE);
@@ -69,20 +79,19 @@ public void testShouldEncodeEntities() {
6979
toTriples.endEntity();
7080
toTriples.endRecord();
7181

72-
Mockito.verify(receiver).process(
73-
new Triple(REC_ID, ENTITY_NAME, Formeta.GROUP_START +NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE
74-
+ Formeta.ITEM_SEPARATOR + ENTITY_NAME + Formeta.GROUP_START + NAME
75-
+ Formeta.NAME_VALUE_SEPARATOR + VALUE + Formeta.GROUP_END + Formeta.GROUP_END,
76-
ObjectType.ENTITY));
82+
final String objectValue =
83+
Formeta.GROUP_START +
84+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE + Formeta.ITEM_SEPARATOR +
85+
ENTITY_NAME + Formeta.GROUP_START +
86+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
87+
Formeta.GROUP_END +
88+
Formeta.GROUP_END;
89+
Mockito.verify(receiver).process(new Triple(REC_ID, ENTITY_NAME, objectValue, ObjectType.ENTITY));
7790
}
7891

7992
@Test
8093
public void testShouldRedirectOnMoveToInName() {
81-
final StreamToTriples toTriples = new StreamToTriples();
8294
toTriples.setRedirect(true);
83-
@SuppressWarnings("unchecked")
84-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
85-
toTriples.setReceiver(receiver);
8695

8796
toTriples.startRecord(REC_ID);
8897
toTriples.literal("{to:" + REC_ALT_ID + "}" + NAME, VALUE);
@@ -93,11 +102,7 @@ public void testShouldRedirectOnMoveToInName() {
93102

94103
@Test
95104
public void testShouldRedirectIfAltIdGiven() {
96-
final StreamToTriples toTriples = new StreamToTriples();
97105
toTriples.setRedirect(true);
98-
@SuppressWarnings("unchecked")
99-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
100-
toTriples.setReceiver(receiver);
101106

102107
toTriples.startRecord(REC_ID);
103108
toTriples.literal(StreamConstants.ID, REC_ALT_ID);
@@ -107,4 +112,45 @@ public void testShouldRedirectIfAltIdGiven() {
107112
Mockito.verify(receiver).process(new Triple(REC_ALT_ID, NAME, VALUE));
108113
}
109114

115+
@Test
116+
public void testShouldEncodeWholeRecordsIfRecordPredicateIsGiven() {
117+
toTriples.setRecordPredicate(RECORD_PREDICATE);
118+
119+
toTriples.startRecord(REC_ID);
120+
toTriples.startEntity(ENTITY_NAME);
121+
toTriples.literal(NAME, VALUE);
122+
toTriples.endEntity();
123+
toTriples.startEntity(ENTITY_NAME);
124+
toTriples.literal(NAME, VALUE);
125+
toTriples.endEntity();
126+
toTriples.endRecord();
127+
128+
final String objectValue =
129+
Formeta.GROUP_START +
130+
ENTITY_NAME + Formeta.GROUP_START +
131+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
132+
Formeta.GROUP_END +
133+
ENTITY_NAME + Formeta.GROUP_START +
134+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
135+
Formeta.GROUP_END +
136+
Formeta.GROUP_END;
137+
Mockito.verify(receiver).process(new Triple(REC_ID, RECORD_PREDICATE, objectValue, ObjectType.ENTITY));
138+
}
139+
140+
@Test
141+
public void testShouldRedirectEvenIfRecordPredicateIsGiven() {
142+
toTriples.setRecordPredicate(RECORD_PREDICATE);
143+
toTriples.setRedirect(true);
144+
145+
toTriples.startRecord(REC_ID);
146+
toTriples.literal(StreamConstants.ID, REC_ALT_ID);
147+
toTriples.literal(NAME, VALUE);
148+
toTriples.endRecord();
149+
150+
final String objectValue =
151+
Formeta.GROUP_START +
152+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
153+
Formeta.GROUP_END;
154+
Mockito.verify(receiver).process(new Triple(REC_ALT_ID, RECORD_PREDICATE, objectValue));
155+
}
110156
}

0 commit comments

Comments
 (0)