|
1 | 1 | /*
|
2 |
| - * Copyright 2013, 2014 Deutsche Nationalbibliothek |
| 2 | + * Copyright 2016 Deutsche Nationalbibliothek |
3 | 3 | *
|
4 |
| - * Licensed under the Apache License, Version 2.0 the "License"; |
5 |
| - * you may not use this file except in compliance with the License. |
6 |
| - * You may obtain a copy of the License at |
| 4 | + * Licensed under the Apache License, Version 2.0 the "License"; |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
7 | 7 | *
|
8 |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
9 | 9 | *
|
10 |
| - * Unless required by applicable law or agreed to in writing, software |
11 |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
12 |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 |
| - * See the License for the specific language governing permissions and |
14 |
| - * limitations under the License. |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
15 | 15 | */
|
16 |
| -package org.culturegraph.mf.stream.pipe; |
17 | 16 |
|
18 |
| -import java.util.Deque; |
19 |
| -import java.util.LinkedList; |
20 |
| -import java.util.NoSuchElementException; |
| 17 | +package org.culturegraph.mf.stream.pipe; |
21 | 18 |
|
22 | 19 | import org.culturegraph.mf.framework.DefaultStreamPipe;
|
23 | 20 | import org.culturegraph.mf.framework.StreamReceiver;
|
24 | 21 | import org.culturegraph.mf.framework.annotations.Description;
|
25 | 22 | import org.culturegraph.mf.framework.annotations.In;
|
26 | 23 | import org.culturegraph.mf.framework.annotations.Out;
|
27 |
| - |
| 24 | +import org.culturegraph.mf.stream.sink.EntityPathTracker; |
28 | 25 |
|
29 | 26 | /**
|
30 |
| - * flattens out entities in a stream by introducing dots in literal names. |
| 27 | + * Flattens all entities in a stream by prefixing the literals with the entity |
| 28 | + * paths. The stream emitted by this module is guaranteed to not contain any |
| 29 | + * <i>start-entity</i> and <i>end-entity</i> events. |
| 30 | + * |
| 31 | + * <p>For example, take the following sequence of events: |
| 32 | + * <pre>{@literal |
| 33 | + * start-record "1" |
| 34 | + * literal "toplevel": literal-value |
| 35 | + * start-entity "entity" |
| 36 | + * literal "nested": literal-value |
| 37 | + * end-entity |
| 38 | + * end-record |
| 39 | + * }</pre> |
| 40 | + * |
| 41 | + * These events are transformed by the {@code StreamFlattener} into the |
| 42 | + * following sequence of events: |
| 43 | + * <pre>{@literal |
| 44 | + * start-record "1" |
| 45 | + * literal "toplevel": literal-value |
| 46 | + * literal "entity.nested": literal-value |
| 47 | + * end-record |
| 48 | + * }</pre> |
| 49 | + * |
| 50 | + * @author Christoph Böhme (rewrite) |
31 | 51 | * @author Markus Michael Geipel
|
32 |
| - * |
| 52 | + * @see EntityPathTracker |
33 | 53 | */
|
34 |
| - |
35 | 54 | @Description("flattens out entities in a stream by introducing dots in literal names")
|
36 | 55 | @In(StreamReceiver.class)
|
37 | 56 | @Out(StreamReceiver.class)
|
38 | 57 | public final class StreamFlattener extends DefaultStreamPipe<StreamReceiver> {
|
39 |
| - |
| 58 | + |
40 | 59 | public static final String DEFAULT_ENTITY_MARKER = ".";
|
41 |
| - private static final String ENTITIES_NOT_BALANCED = "Entity starts and ends are not balanced"; |
42 | 60 |
|
43 |
| - private String entityMarker = DEFAULT_ENTITY_MARKER; |
44 |
| - private final Deque<String> entityStack = new LinkedList<String>(); |
45 |
| - private final StringBuilder entityPath = new StringBuilder(); |
46 |
| - private String currentEntityPath = ""; |
47 |
| - |
48 |
| - public void setEntityMarker(final String entityMarker) { |
49 |
| - this.entityMarker = entityMarker; |
| 61 | + private static final String ENTITIES_NOT_BALANCED = |
| 62 | + "Entity starts and ends are not balanced"; |
| 63 | + |
| 64 | + private final EntityPathTracker pathTracker = new EntityPathTracker(); |
| 65 | + |
| 66 | + public StreamFlattener() { |
| 67 | + setEntityMarker(DEFAULT_ENTITY_MARKER); |
50 | 68 | }
|
51 | 69 |
|
52 | 70 | public String getEntityMarker() {
|
53 |
| - return entityMarker; |
| 71 | + return pathTracker.getEntitySeparator(); |
| 72 | + } |
| 73 | + |
| 74 | + public void setEntityMarker(final String entityMarker) { |
| 75 | + pathTracker.setEntitySeparator(entityMarker); |
54 | 76 | }
|
55 | 77 |
|
56 | 78 | @Override
|
57 | 79 | public void startRecord(final String identifier) {
|
58 | 80 | assert !isClosed();
|
59 |
| - entityStack.clear(); |
60 |
| - currentEntityPath = ""; |
61 |
| - if (entityPath.length() != 0) { |
62 |
| - entityPath.delete(0, entityPath.length()); |
63 |
| - } |
| 81 | + pathTracker.startRecord(identifier); |
64 | 82 | getReceiver().startRecord(identifier);
|
65 | 83 | }
|
66 | 84 |
|
67 | 85 | @Override
|
68 | 86 | public void endRecord() {
|
69 | 87 | assert !isClosed();
|
70 |
| - currentEntityPath = ""; |
| 88 | + if (pathTracker.getCurrentEntityName() != null) { |
| 89 | + // TODO: Remove this check in 4.0.0. We assume well-formedness |
| 90 | + throw new IllegalStateException(ENTITIES_NOT_BALANCED); |
| 91 | + } |
| 92 | + pathTracker.endRecord(); |
71 | 93 | getReceiver().endRecord();
|
72 |
| - |
73 | 94 | }
|
74 | 95 |
|
75 | 96 | @Override
|
76 | 97 | public void startEntity(final String name) {
|
77 | 98 | assert !isClosed();
|
78 |
| - entityStack.push(name); |
79 |
| - entityPath.append(name); |
80 |
| - entityPath.append(entityMarker); |
81 |
| - currentEntityPath = entityPath.toString(); |
82 |
| - |
| 99 | + pathTracker.startEntity(name); |
83 | 100 | }
|
84 | 101 |
|
85 | 102 | @Override
|
86 | 103 | public void endEntity() {
|
87 | 104 | assert !isClosed();
|
88 |
| - try { |
89 |
| - final int end = entityPath.length(); |
90 |
| - final String name = entityStack.pop(); |
91 |
| - entityPath.delete(end - name.length() - entityMarker.length(), end); |
92 |
| - currentEntityPath = entityPath.toString(); |
93 |
| - } catch (NoSuchElementException exc) { |
94 |
| - throw new IllegalStateException(ENTITIES_NOT_BALANCED + ": " + exc.getMessage(), exc); |
| 105 | + if (pathTracker.getCurrentEntityName() == null) { |
| 106 | + // TODO: Remove this check in 4.0.0. We assume well-formedness |
| 107 | + throw new IllegalStateException(ENTITIES_NOT_BALANCED); |
95 | 108 | }
|
| 109 | + pathTracker.endEntity(); |
96 | 110 | }
|
97 | 111 |
|
98 | 112 | @Override
|
99 | 113 | public void literal(final String name, final String value) {
|
100 | 114 | assert !isClosed();
|
101 |
| - getReceiver().literal(currentEntityPath + name, value); |
| 115 | + getReceiver().literal(pathTracker.getCurrentPathWith(name), value); |
102 | 116 | }
|
103 | 117 |
|
104 | 118 | public String getCurrentEntityName() {
|
105 |
| - return entityStack.peek(); |
| 119 | + return pathTracker.getCurrentEntityName(); |
106 | 120 | }
|
107 | 121 |
|
108 | 122 | public String getCurrentPath() {
|
109 |
| - if(currentEntityPath.isEmpty()){ |
110 |
| - return ""; |
111 |
| - } |
112 |
| - return currentEntityPath.substring(0, currentEntityPath.length() - entityMarker.length()); |
| 123 | + return pathTracker.getCurrentPath(); |
113 | 124 | }
|
114 |
| - |
| 125 | + |
115 | 126 | }
|
0 commit comments