Skip to content

Commit e0d830c

Browse files
committed
Added support for encoding complete records in a single triple.
1 parent f4a73ff commit e0d830c

File tree

2 files changed

+152
-62
lines changed

2 files changed

+152
-62
lines changed

src/main/java/org/culturegraph/mf/stream/converter/StreamToTriples.java

Lines changed: 79 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,10 @@
3838
*
3939
*/
4040
@Description("Takes literals from a stream and emits them as triples such "
41-
+ "that the name and value become predicate and object and the record id the subject."
42-
+ "If 'redirect' is true, use '_id' to change the id, or '{to:ID}NAME' to change the id of a single literal.")
41+
+ "that the name and value become predicate and object and the record id the subject. "
42+
+ "If 'redirect' is true, use '_id' to change the id, or '{to:ID}NAME' to change the id of a single literal. "
43+
+ "Set 'recordPredicate' to encode a complete record in one triple. The value of 'recordPredicate' is used "
44+
+ "as the predicate of the triple. If 'recordPredicate' is set, no redirections will be made.")
4345
@In(StreamReceiver.class)
4446
@Out(Triple.class)
4547
public final class StreamToTriples extends DefaultStreamPipe<ObjectReceiver<Triple>> {
@@ -48,45 +50,86 @@ public final class StreamToTriples extends DefaultStreamPipe<ObjectReceiver<Trip
4850

4951
private final List<String> nameBuffer = new ArrayList<String>();
5052
private final List<String> valueBuffer = new ArrayList<String>();
51-
private String currentId;
52-
private boolean redirect;
5353
private final Formatter formatter = new ConciseFormatter();
5454

55-
private int entityDepth;
56-
private String currentEntityName;
57-
58-
55+
private boolean redirect;
56+
private String recordPredicate;
57+
58+
private int nestingLevel;
59+
private int encodeLevel;
60+
private String predicateName;
61+
private String currentId;
5962

63+
public boolean isRedirect() {
64+
return redirect;
65+
}
66+
6067
public void setRedirect(final boolean redirect) {
6168
this.redirect = redirect;
6269
}
70+
71+
public String getRecordPredicate() {
72+
return recordPredicate;
73+
}
74+
75+
public void setRecordPredicate(final String recordPredicate) {
76+
this.recordPredicate = recordPredicate;
77+
}
6378

6479
@Override
6580
public void startRecord(final String identifier) {
6681
assert !isClosed();
67-
entityDepth = 0;
68-
this.currentId = identifier;
82+
83+
currentId = identifier;
84+
85+
if (recordPredicate != null) {
86+
encodeLevel = 0;
87+
startEncode(recordPredicate);
88+
} else {
89+
encodeLevel = 1;
90+
}
91+
92+
nestingLevel = 1;
93+
}
94+
95+
@Override
96+
public void endRecord() {
97+
assert !isClosed();
98+
99+
nestingLevel = 0;
100+
101+
if (nestingLevel == encodeLevel) {
102+
endEncode();
103+
}
104+
105+
if (redirect) {
106+
for (int i = 0; i < nameBuffer.size(); ++i) {
107+
getReceiver().process(new Triple(currentId, nameBuffer.get(i), valueBuffer.get(i)));
108+
}
109+
nameBuffer.clear();
110+
valueBuffer.clear();
111+
}
69112
}
70113

71114
@Override
72115
public void startEntity(final String name) {
73-
if (entityDepth == 0) {
74-
currentEntityName = name;
75-
formatter.startGroup("");
76-
} else {
116+
assert !isClosed();
117+
118+
if (nestingLevel > encodeLevel) {
77119
formatter.startGroup(name);
120+
} else {
121+
startEncode(name);
78122
}
79-
++entityDepth;
80-
123+
++nestingLevel;
81124
}
82125

83126
@Override
84127
public void endEntity() {
85-
--entityDepth;
86-
if (entityDepth == 0) {
87-
formatter.endGroup();
88-
dispatch(currentEntityName, formatter.toString(), ObjectType.ENTITY);
89-
formatter.reset();
128+
assert !isClosed();
129+
130+
--nestingLevel;
131+
if (nestingLevel == encodeLevel) {
132+
endEncode();
90133
} else {
91134
formatter.endGroup();
92135
}
@@ -95,12 +138,23 @@ public void endEntity() {
95138
@Override
96139
public void literal(final String name, final String value) {
97140
assert !isClosed();
98-
if (entityDepth == 0) {
99-
dispatch(name, value, ObjectType.STRING);
100-
} else {
141+
142+
if (nestingLevel > encodeLevel) {
101143
formatter.literal(name, value);
144+
} else {
145+
dispatch(name, value, ObjectType.STRING);
102146
}
103-
147+
}
148+
149+
private void startEncode(final String predicate) {
150+
predicateName = predicate;
151+
formatter.reset();
152+
formatter.startGroup("");
153+
}
154+
155+
private void endEncode() {
156+
formatter.endGroup();
157+
dispatch(predicateName, formatter.toString(), ObjectType.ENTITY);
104158
}
105159

106160
private void dispatch(final String name, final String value, final ObjectType type) {
@@ -121,15 +175,4 @@ private void dispatch(final String name, final String value, final ObjectType ty
121175
}
122176
}
123177

124-
@Override
125-
public void endRecord() {
126-
assert !isClosed();
127-
if (redirect) {
128-
for (int i = 0; i < nameBuffer.size(); ++i) {
129-
getReceiver().process(new Triple(currentId, nameBuffer.get(i), valueBuffer.get(i)));
130-
}
131-
nameBuffer.clear();
132-
valueBuffer.clear();
133-
}
134-
}
135178
}

src/test/java/org/culturegraph/mf/stream/converter/StreamToTriplesTest.java

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020
import org.culturegraph.mf.types.Triple;
2121
import org.culturegraph.mf.types.Triple.ObjectType;
2222
import org.culturegraph.mf.util.StreamConstants;
23+
import org.junit.After;
24+
import org.junit.Before;
2325
import org.junit.Test;
26+
import org.mockito.Mock;
2427
import org.mockito.Mockito;
28+
import org.mockito.MockitoAnnotations;
2529

2630
/**
2731
* Tests {@link StreamToTriples}
@@ -36,15 +40,27 @@ public final class StreamToTriplesTest {
3640
private static final String ENTITY_NAME = "ename";
3741
private static final String REC_ID = "id";
3842
private static final String REC_ALT_ID = "altid";
39-
43+
private static final String RECORD_PREDICATE = "rec_pred";
44+
45+
private StreamToTriples toTriples;
46+
47+
@Mock
48+
private ObjectReceiver<Triple> receiver;
49+
50+
@Before
51+
public void setup() {
52+
MockitoAnnotations.initMocks(this);
53+
toTriples = new StreamToTriples();
54+
toTriples.setReceiver(receiver);
55+
}
56+
57+
@After
58+
public void cleanup() {
59+
toTriples.closeStream();
60+
}
61+
4062
@Test
4163
public void testShouldBuildTripleFromLiteral() {
42-
final StreamToTriples toTriples = new StreamToTriples();
43-
@SuppressWarnings("unchecked")
44-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
45-
46-
toTriples.setReceiver(receiver);
47-
4864
toTriples.startRecord(REC_ID);
4965
toTriples.literal(NAME, VALUE);
5066
toTriples.endRecord();
@@ -54,12 +70,6 @@ public void testShouldBuildTripleFromLiteral() {
5470

5571
@Test
5672
public void testShouldEncodeEntities() {
57-
final StreamToTriples toTriples = new StreamToTriples();
58-
@SuppressWarnings("unchecked")
59-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
60-
61-
toTriples.setReceiver(receiver);
62-
6373
toTriples.startRecord(REC_ID);
6474
toTriples.startEntity(ENTITY_NAME);
6575
toTriples.literal(NAME, VALUE);
@@ -69,20 +79,19 @@ public void testShouldEncodeEntities() {
6979
toTriples.endEntity();
7080
toTriples.endRecord();
7181

72-
Mockito.verify(receiver).process(
73-
new Triple(REC_ID, ENTITY_NAME, Formeta.GROUP_START +NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE
74-
+ Formeta.ITEM_SEPARATOR + ENTITY_NAME + Formeta.GROUP_START + NAME
75-
+ Formeta.NAME_VALUE_SEPARATOR + VALUE + Formeta.GROUP_END + Formeta.GROUP_END,
76-
ObjectType.ENTITY));
82+
final String objectValue =
83+
Formeta.GROUP_START +
84+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE + Formeta.ITEM_SEPARATOR +
85+
ENTITY_NAME + Formeta.GROUP_START +
86+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
87+
Formeta.GROUP_END +
88+
Formeta.GROUP_END;
89+
Mockito.verify(receiver).process(new Triple(REC_ID, ENTITY_NAME, objectValue, ObjectType.ENTITY));
7790
}
7891

7992
@Test
8093
public void testShouldRedirectOnMoveToInName() {
81-
final StreamToTriples toTriples = new StreamToTriples();
8294
toTriples.setRedirect(true);
83-
@SuppressWarnings("unchecked")
84-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
85-
toTriples.setReceiver(receiver);
8695

8796
toTriples.startRecord(REC_ID);
8897
toTriples.literal("{to:" + REC_ALT_ID + "}" + NAME, VALUE);
@@ -93,11 +102,7 @@ public void testShouldRedirectOnMoveToInName() {
93102

94103
@Test
95104
public void testShouldRedirectIfAltIdGiven() {
96-
final StreamToTriples toTriples = new StreamToTriples();
97105
toTriples.setRedirect(true);
98-
@SuppressWarnings("unchecked")
99-
final ObjectReceiver<Triple> receiver = Mockito.mock(ObjectReceiver.class);
100-
toTriples.setReceiver(receiver);
101106

102107
toTriples.startRecord(REC_ID);
103108
toTriples.literal(StreamConstants.ID, REC_ALT_ID);
@@ -107,4 +112,46 @@ public void testShouldRedirectIfAltIdGiven() {
107112
Mockito.verify(receiver).process(new Triple(REC_ALT_ID, NAME, VALUE));
108113
}
109114

115+
@Test
116+
public void testShouldEncodeWholeRecordsIfRecordPredicateIsGiven() {
117+
toTriples.setRecordPredicate(RECORD_PREDICATE);
118+
119+
toTriples.startRecord(REC_ID);
120+
toTriples.startEntity(ENTITY_NAME);
121+
toTriples.literal(NAME, VALUE);
122+
toTriples.endEntity();
123+
toTriples.startEntity(ENTITY_NAME);
124+
toTriples.literal(NAME, VALUE);
125+
toTriples.endEntity();
126+
toTriples.endRecord();
127+
128+
final String objectValue =
129+
Formeta.GROUP_START +
130+
ENTITY_NAME + Formeta.GROUP_START +
131+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
132+
Formeta.GROUP_END +
133+
ENTITY_NAME + Formeta.GROUP_START +
134+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
135+
Formeta.GROUP_END +
136+
Formeta.GROUP_END;
137+
Mockito.verify(receiver).process(new Triple(REC_ID, RECORD_PREDICATE, objectValue, ObjectType.ENTITY));
138+
}
139+
140+
@Test
141+
public void testShouldNotRedirectIfRecordPredicateIsGiven() {
142+
toTriples.setRecordPredicate(RECORD_PREDICATE);
143+
toTriples.setRedirect(true);
144+
145+
toTriples.startRecord(REC_ID);
146+
toTriples.literal(StreamConstants.ID, REC_ALT_ID);
147+
toTriples.literal(NAME, VALUE);
148+
toTriples.endRecord();
149+
150+
final String objectValue =
151+
Formeta.GROUP_START +
152+
StreamConstants.ID + Formeta.NAME_VALUE_SEPARATOR + REC_ALT_ID + Formeta.ITEM_SEPARATOR +
153+
NAME + Formeta.NAME_VALUE_SEPARATOR + VALUE +
154+
Formeta.GROUP_END;
155+
Mockito.verify(receiver).process(new Triple(REC_ID, RECORD_PREDICATE, objectValue));
156+
}
110157
}

0 commit comments

Comments
 (0)