|
1 | 1 | /* |
2 | | - * Copyright 2013, 2014 Deutsche Nationalbibliothek |
| 2 | + * Copyright 2016 Deutsche Nationalbibliothek |
3 | 3 | * |
4 | | - * Licensed under the Apache License, Version 2.0 the "License"; |
5 | | - * you may not use this file except in compliance with the License. |
6 | | - * You may obtain a copy of the License at |
| 4 | + * Licensed under the Apache License, Version 2.0 the "License"; |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
7 | 7 | * |
8 | | - * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
9 | 9 | * |
10 | | - * Unless required by applicable law or agreed to in writing, software |
11 | | - * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | - * See the License for the specific language governing permissions and |
14 | | - * limitations under the License. |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
15 | 15 | */ |
16 | | -package org.culturegraph.mf.stream.pipe; |
17 | 16 |
|
18 | | -import java.util.Deque; |
19 | | -import java.util.LinkedList; |
20 | | -import java.util.NoSuchElementException; |
| 17 | +package org.culturegraph.mf.stream.pipe; |
21 | 18 |
|
22 | 19 | import org.culturegraph.mf.framework.DefaultStreamPipe; |
23 | 20 | import org.culturegraph.mf.framework.StreamReceiver; |
24 | 21 | import org.culturegraph.mf.framework.annotations.Description; |
25 | 22 | import org.culturegraph.mf.framework.annotations.In; |
26 | 23 | import org.culturegraph.mf.framework.annotations.Out; |
27 | | - |
| 24 | +import org.culturegraph.mf.stream.sink.EntityPathTracker; |
28 | 25 |
|
29 | 26 | /** |
30 | | - * flattens out entities in a stream by introducing dots in literal names. |
| 27 | + * Flattens all entities in a stream by prefixing the literals with the entity |
| 28 | + * paths. The stream emitted by this module is guaranteed to not contain any |
| 29 | + * <i>start-entity</i> and <i>end-entity</i> events. |
| 30 | + * |
| 31 | + * <p>For example, take the following sequence of events: |
| 32 | + * <pre>{@literal |
| 33 | + * start-record "1" |
| 34 | + * literal "toplevel": literal-value |
| 35 | + * start-entity "entity" |
| 36 | + * literal "nested": literal-value |
| 37 | + * end-entity |
| 38 | + * end-record |
| 39 | + * }</pre> |
| 40 | + * |
| 41 | + * These events are transformed by the {@code StreamFlattener} into the |
| 42 | + * following sequence of events: |
| 43 | + * <pre>{@literal |
| 44 | + * start-record "1" |
| 45 | + * literal "toplevel": literal-value |
| 46 | + * literal "entity.nested": literal-value |
| 47 | + * end-record |
| 48 | + * }</pre> |
| 49 | + * |
| 50 | + * @author Christoph Böhme (rewrite) |
31 | 51 | * @author Markus Michael Geipel |
32 | | - * |
| 52 | + * @see EntityPathTracker |
33 | 53 | */ |
34 | | - |
35 | 54 | @Description("flattens out entities in a stream by introducing dots in literal names") |
36 | 55 | @In(StreamReceiver.class) |
37 | 56 | @Out(StreamReceiver.class) |
38 | 57 | public final class StreamFlattener extends DefaultStreamPipe<StreamReceiver> { |
39 | | - |
| 58 | + |
40 | 59 | public static final String DEFAULT_ENTITY_MARKER = "."; |
41 | | - private static final String ENTITIES_NOT_BALANCED = "Entity starts and ends are not balanced"; |
42 | 60 |
|
43 | | - private String entityMarker = DEFAULT_ENTITY_MARKER; |
44 | | - private final Deque<String> entityStack = new LinkedList<String>(); |
45 | | - private final StringBuilder entityPath = new StringBuilder(); |
46 | | - private String currentEntityPath = ""; |
47 | | - |
48 | | - public void setEntityMarker(final String entityMarker) { |
49 | | - this.entityMarker = entityMarker; |
| 61 | + private static final String ENTITIES_NOT_BALANCED = |
| 62 | + "Entity starts and ends are not balanced"; |
| 63 | + |
| 64 | + private final EntityPathTracker pathTracker = new EntityPathTracker(); |
| 65 | + |
| 66 | + public StreamFlattener() { |
| 67 | + setEntityMarker(DEFAULT_ENTITY_MARKER); |
50 | 68 | } |
51 | 69 |
|
52 | 70 | public String getEntityMarker() { |
53 | | - return entityMarker; |
| 71 | + return pathTracker.getEntitySeparator(); |
| 72 | + } |
| 73 | + |
| 74 | + public void setEntityMarker(final String entityMarker) { |
| 75 | + pathTracker.setEntitySeparator(entityMarker); |
54 | 76 | } |
55 | 77 |
|
56 | 78 | @Override |
57 | 79 | public void startRecord(final String identifier) { |
58 | 80 | assert !isClosed(); |
59 | | - entityStack.clear(); |
60 | | - currentEntityPath = ""; |
61 | | - if (entityPath.length() != 0) { |
62 | | - entityPath.delete(0, entityPath.length()); |
63 | | - } |
| 81 | + pathTracker.startRecord(identifier); |
64 | 82 | getReceiver().startRecord(identifier); |
65 | 83 | } |
66 | 84 |
|
67 | 85 | @Override |
68 | 86 | public void endRecord() { |
69 | 87 | assert !isClosed(); |
70 | | - currentEntityPath = ""; |
| 88 | + if (pathTracker.getCurrentEntityName() != null) { |
| 89 | + // TODO: Remove this check in 4.0.0. We assume well-formedness |
| 90 | + throw new IllegalStateException(ENTITIES_NOT_BALANCED); |
| 91 | + } |
| 92 | + pathTracker.endRecord(); |
71 | 93 | getReceiver().endRecord(); |
72 | | - |
73 | 94 | } |
74 | 95 |
|
75 | 96 | @Override |
76 | 97 | public void startEntity(final String name) { |
77 | 98 | assert !isClosed(); |
78 | | - entityStack.push(name); |
79 | | - entityPath.append(name); |
80 | | - entityPath.append(entityMarker); |
81 | | - currentEntityPath = entityPath.toString(); |
82 | | - |
| 99 | + pathTracker.startEntity(name); |
83 | 100 | } |
84 | 101 |
|
85 | 102 | @Override |
86 | 103 | public void endEntity() { |
87 | 104 | assert !isClosed(); |
88 | | - try { |
89 | | - final int end = entityPath.length(); |
90 | | - final String name = entityStack.pop(); |
91 | | - entityPath.delete(end - name.length() - entityMarker.length(), end); |
92 | | - currentEntityPath = entityPath.toString(); |
93 | | - } catch (NoSuchElementException exc) { |
94 | | - throw new IllegalStateException(ENTITIES_NOT_BALANCED + ": " + exc.getMessage(), exc); |
| 105 | + if (pathTracker.getCurrentEntityName() == null) { |
| 106 | + // TODO: Remove this check in 4.0.0. We assume well-formedness |
| 107 | + throw new IllegalStateException(ENTITIES_NOT_BALANCED); |
95 | 108 | } |
| 109 | + pathTracker.endEntity(); |
96 | 110 | } |
97 | 111 |
|
98 | 112 | @Override |
99 | 113 | public void literal(final String name, final String value) { |
100 | 114 | assert !isClosed(); |
101 | | - getReceiver().literal(currentEntityPath + name, value); |
| 115 | + getReceiver().literal(pathTracker.getCurrentPathWith(name), value); |
102 | 116 | } |
103 | 117 |
|
104 | 118 | public String getCurrentEntityName() { |
105 | | - return entityStack.peek(); |
| 119 | + return pathTracker.getCurrentEntityName(); |
106 | 120 | } |
107 | 121 |
|
108 | 122 | public String getCurrentPath() { |
109 | | - if(currentEntityPath.isEmpty()){ |
110 | | - return ""; |
111 | | - } |
112 | | - return currentEntityPath.substring(0, currentEntityPath.length() - entityMarker.length()); |
| 123 | + return pathTracker.getCurrentPath(); |
113 | 124 | } |
114 | | - |
| 125 | + |
115 | 126 | } |
0 commit comments