Skip to content

Commit d054d3e

Browse files
committed
Change to Lineage as a base abstract class
1 parent 2d97041 commit d054d3e

File tree

6 files changed

+178
-149
lines changed

6 files changed

+178
-149
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.beam.sdk.lineage;
19+
20+
import javax.annotation.Nullable;
21+
import org.apache.beam.sdk.metrics.Lineage;
22+
import org.apache.beam.sdk.options.PipelineOptions;
23+
24+
public interface LineageRegistrar {
25+
26+
@Nullable
27+
Lineage fromOptions(PipelineOptions options, Lineage.Type type);
28+
}
Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,53 @@
1-
package org.apache.beam.sdk.lineage;
2-
3-
import com.facebook.presto.hadoop.$internal.org.apache.avro.reflect.Nullable;
4-
5-
public interface LineageReporter {
6-
/**
7-
* Adds lineage information using pre-formatted FQN segments.
8-
*
9-
* @param rollupSegments FQN segments already escaped per Dataplex format
10-
*/
11-
void add(Iterable<String> rollupSegments);
12-
13-
/**
14-
* Adds lineage with system, optional subtype, and hierarchical segments.
15-
*
16-
* @param system The data system identifier (e.g., "bigquery", "kafka")
17-
* @param subtype Optional subtype (e.g., "table", "topic"), may be null
18-
* @param segments Hierarchical path segments
19-
* @param lastSegmentSep Separator for the last segment, may be null
20-
*/
21-
void add(
22-
String system,
23-
@Nullable String subtype,
24-
Iterable<String> segments,
25-
@Nullable String lastSegmentSep);
26-
27-
/**
28-
* Add a FQN (fully-qualified name) to Lineage.
29-
*/
30-
default void add(String system, Iterable<String> segments, @Nullable String sep) {
31-
add(system, null, segments, sep);
32-
}
33-
34-
/**
35-
* Add a FQN (fully-qualified name) to Lineage.
36-
*/
37-
default void add(String system, Iterable<String> segments) {
38-
add(system, segments, null);
39-
}
40-
}
1+
///*
2+
// * Licensed to the Apache Software Foundation (ASF) under one
3+
// * or more contributor license agreements. See the NOTICE file
4+
// * distributed with this work for additional information
5+
// * regarding copyright ownership. The ASF licenses this file
6+
// * to you under the Apache License, Version 2.0 (the
7+
// * "License"); you may not use this file except in compliance
8+
// * with the License. You may obtain a copy of the License at
9+
// *
10+
// * http://www.apache.org/licenses/LICENSE-2.0
11+
// *
12+
// * Unless required by applicable law or agreed to in writing, software
13+
// * distributed under the License is distributed on an "AS IS" BASIS,
14+
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// * See the License for the specific language governing permissions and
16+
// * limitations under the License.
17+
// */
18+
//package org.apache.beam.sdk.lineage;
19+
//
20+
//import com.facebook.presto.hadoop.$internal.org.apache.avro.reflect.Nullable;
21+
//
22+
//public interface LineageReporter {
23+
// /**
24+
// * Adds lineage information using pre-formatted FQN segments.
25+
// *
26+
// * @param rollupSegments FQN segments already escaped per Dataplex format
27+
// */
28+
// void add(Iterable<String> rollupSegments);
29+
//
30+
// /**
31+
// * Adds lineage with system, optional subtype, and hierarchical segments.
32+
// *
33+
// * @param system The data system identifier (e.g., "bigquery", "kafka")
34+
// * @param subtype Optional subtype (e.g., "table", "topic"), may be null
35+
// * @param segments Hierarchical path segments
36+
// * @param lastSegmentSep Separator for the last segment, may be null
37+
// */
38+
// void add(
39+
// String system,
40+
// @Nullable String subtype,
41+
// Iterable<String> segments,
42+
// @Nullable String lastSegmentSep);
43+
//
44+
// /** Add a FQN (fully-qualified name) to Lineage. */
45+
// default void add(String system, Iterable<String> segments, @Nullable String sep) {
46+
// add(system, null, segments, sep);
47+
// }
48+
//
49+
// /** Add a FQN (fully-qualified name) to Lineage. */
50+
// default void add(String system, Iterable<String> segments) {
51+
// add(system, segments, null);
52+
// }
53+
//}

sdks/java/core/src/main/java/org/apache/beam/sdk/lineage/LineageReporterRegistrar.java

Lines changed: 0 additions & 12 deletions
This file was deleted.

sdks/java/core/src/main/java/org/apache/beam/sdk/metrics/Lineage.java

Lines changed: 38 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,14 @@
2828
import java.util.concurrent.atomic.AtomicReference;
2929
import java.util.regex.Pattern;
3030
import org.apache.beam.sdk.annotations.Internal;
31-
import org.apache.beam.sdk.lineage.LineageReporter;
32-
import org.apache.beam.sdk.lineage.LineageReporterRegistrar;
31+
import org.apache.beam.sdk.lineage.LineageRegistrar;
3332
import org.apache.beam.sdk.metrics.Metrics.MetricsFlag;
3433
import org.apache.beam.sdk.options.PipelineOptions;
3534
import org.apache.beam.sdk.options.PipelineOptionsFactory;
3635
import org.apache.beam.sdk.util.common.ReflectHelpers;
3736
import org.apache.beam.sdk.values.KV;
3837
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
3938
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter;
40-
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
4139
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
4240
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets;
4341
import org.checkerframework.checker.nullness.qual.Nullable;
@@ -47,31 +45,19 @@
4745
/**
4846
* Standard collection of metrics used to record source and sinks information for lineage tracking.
4947
*/
50-
public class Lineage {
51-
// Namespace for lineage metrics; used to filter queries in Lineage.query() and in MetricsLineageReporter
48+
public abstract class Lineage {
5249
public static final String LINEAGE_NAMESPACE = "lineage";
5350
private static final Logger LOG = LoggerFactory.getLogger(Lineage.class);
54-
private static final AtomicReference<LineageReporter> SOURCES = new AtomicReference<>();
55-
private static final AtomicReference<LineageReporter> SINKS = new AtomicReference<>();
51+
private static final AtomicReference<Lineage> SOURCES = new AtomicReference<>();
52+
private static final AtomicReference<Lineage> SINKS = new AtomicReference<>();
5653

5754
private static final AtomicReference<KV<Long, Integer>> LINEAGE_REVISION =
5855
new AtomicReference<>();
5956

6057
// Reserved characters are backtick, colon, whitespace (space, \t, \n) and dot.
6158
private static final Pattern RESERVED_CHARS = Pattern.compile("[:\\s.`]");
6259

63-
private final Metric metric;
64-
65-
private Lineage(Type type) {
66-
if (MetricsFlag.lineageRollupEnabled()) {
67-
this.metric =
68-
Metrics.boundedTrie(
69-
LINEAGE_NAMESPACE,
70-
type == Type.SOURCE ? Type.SOURCEV2.toString() : Type.SINKV2.toString());
71-
} else {
72-
this.metric = Metrics.stringSet(LINEAGE_NAMESPACE, type.toString());
73-
}
74-
}
60+
protected Lineage() {}
7561

7662
@Internal
7763
public static void initialize(PipelineOptions options) {
@@ -82,70 +68,70 @@ public static void initialize(PipelineOptions options) {
8268
while (true) {
8369
KV<Long, Integer> currentRevision = LINEAGE_REVISION.get();
8470

85-
// Skip re-initialization if same options and revision hasn't changed
8671
if (currentRevision != null
8772
&& currentRevision.getKey().equals(optionsId)
8873
&& currentRevision.getValue() >= nextRevision) {
89-
LOG.debug("Lineage already initialized with options ID {} revision {}, skipping",
90-
optionsId, currentRevision.getValue());
74+
LOG.debug(
75+
"Lineage already initialized with options ID {} revision {}, skipping",
76+
optionsId,
77+
currentRevision.getValue());
9178
return;
9279
}
9380

9481
if (LINEAGE_REVISION.compareAndSet(currentRevision, KV.of(optionsId, nextRevision))) {
95-
LineageReporter sources = createReporter(options, Type.SOURCE);
96-
LineageReporter sinks = createReporter(options, Type.SINK);
82+
Lineage sources = createLineage(options, Type.SOURCE);
83+
Lineage sinks = createLineage(options, Type.SINK);
9784

9885
SOURCES.set(sources);
9986
SINKS.set(sinks);
10087

10188
if (currentRevision == null) {
10289
LOG.info("Lineage initialized with options ID {} revision {}", optionsId, nextRevision);
10390
} else {
104-
LOG.info("Lineage re-initialized from options ID {} to {} (revision {} -> {})",
105-
currentRevision.getKey(), optionsId,
106-
currentRevision.getValue(), nextRevision);
91+
LOG.info(
92+
"Lineage re-initialized from options ID {} to {} (revision {} -> {})",
93+
currentRevision.getKey(),
94+
optionsId,
95+
currentRevision.getValue(),
96+
nextRevision);
10797
}
10898
return;
10999
}
110100
}
111101
}
112102

113-
/// //// NEW METHOD
114-
private static LineageReporter createReporter(PipelineOptions options, Type type) {
115-
Set<LineageReporterRegistrar> registrars = Sets.newTreeSet(
116-
ReflectHelpers.ObjectsClassComparator.INSTANCE);
117-
registrars.addAll(Lists.newArrayList(
118-
ServiceLoader.load(LineageReporterRegistrar.class,
119-
ReflectHelpers.findClassLoader())));
103+
private static Lineage createLineage(PipelineOptions options, Type type) {
104+
Set<LineageRegistrar> registrars =
105+
Sets.newTreeSet(ReflectHelpers.ObjectsClassComparator.INSTANCE);
106+
registrars.addAll(
107+
Lists.newArrayList(
108+
ServiceLoader.load(LineageRegistrar.class, ReflectHelpers.findClassLoader())));
120109

121-
for (LineageReporterRegistrar registrar : registrars) {
122-
LineageReporter reporter = registrar.fromOptions(options, type);
110+
for (LineageRegistrar registrar : registrars) {
111+
Lineage reporter = registrar.fromOptions(options, type);
123112
if (reporter != null) {
124-
LOG.info("Using {} for lineage type {}",
125-
reporter.getClass().getName(), type);
113+
LOG.info("Using {} for lineage type {}", reporter.getClass().getName(), type);
126114
return reporter;
127115
}
128116
}
129117

130118
LOG.debug("Using default Metrics-based lineage for type {}", type);
131-
return new MetricsLineageReporter(type);
119+
return new MetricsLineage(type);
132120
}
133121

134-
/**
135-
* Get {@link LineageReporter} representing sources and optionally side inputs.
136-
*/
137-
public static LineageReporter getSources() {
138-
LineageReporter sources = SOURCES.get();
122+
/** Get {@link Lineage} representing sources and optionally side inputs. */
123+
public static Lineage getSources() {
124+
Lineage sources = SOURCES.get();
139125
if (sources == null) {
140126
initialize(PipelineOptionsFactory.create());
141127
sources = SOURCES.get();
142128
}
143129
return sources;
144130
}
145131

146-
/** {@link LineageReporter} representing sinks. */
147-
public static LineageReporter getSinks() {
148-
LineageReporter sinks = SINKS.get();
132+
/** {@link Lineage} representing sinks. */
133+
public static Lineage getSinks() {
134+
Lineage sinks = SINKS.get();
149135
if (sinks == null) {
150136
initialize(PipelineOptionsFactory.create());
151137
sinks = SINKS.get();
@@ -228,14 +214,7 @@ public void add(String system, Iterable<String> segments) {
228214
* which is already escaped.
229215
* <p>In particular, this means they will often have trailing delimiters.
230216
*/
231-
public void add(Iterable<String> rollupSegments) {
232-
ImmutableList<String> segments = ImmutableList.copyOf(rollupSegments);
233-
if (MetricsFlag.lineageRollupEnabled()) {
234-
((BoundedTrie) this.metric).add(segments);
235-
} else {
236-
((StringSet) this.metric).add(String.join("", segments));
237-
}
238-
}
217+
public abstract void add(Iterable<String> rollupSegments);
239218

240219
/**
241220
* Query {@link BoundedTrie} metrics from {@link MetricResults}.
@@ -245,9 +224,8 @@ public void add(Iterable<String> rollupSegments) {
245224
* @param truncatedMarker the marker to use to represent truncated FQNs.
246225
* @return A flat representation of all FQNs. If the FQN was truncated then it has a trailing
247226
* truncatedMarker.
248-
*
249-
* <p>NOTE: When using a custom LineageReporter plugin, this method
250-
* will return empty results since lineage is not stored in Metrics.
227+
* <p>NOTE: When using a custom LineageReporter plugin, this method will return empty results
228+
* since lineage is not stored in Metrics.
251229
*/
252230
public static Set<String> query(MetricResults results, Type type, String truncatedMarker) {
253231
MetricQueryResults lineageQueryResults = getLineageQueryResults(results, type);
@@ -276,9 +254,8 @@ public static Set<String> query(MetricResults results, Type type, String truncat
276254
* @param results FQNs from the result
277255
* @param type sources or sinks
278256
* @return A flat representation of all FQNs. If the FQN was truncated then it has a trailing '*'.
279-
*
280-
* <p>NOTE: When using a custom LineageReporter plugin, this method
281-
* will return empty results since lineage is not stored in Metrics.
257+
* <p>NOTE: When using a custom LineageReporter plugin, this method will return empty results
258+
* since lineage is not stored in Metrics.
282259
*/
283260
public static Set<String> query(MetricResults results, Type type) {
284261
if (MetricsFlag.lineageRollupEnabled()) {
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.beam.sdk.metrics;
19+
20+
import org.apache.beam.sdk.lineage.LineageReporter;
21+
import org.apache.beam.sdk.metrics.Metrics.MetricsFlag;
22+
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
23+
24+
public class MetricsLineage extends Lineage {
25+
26+
private final Metric metric;
27+
28+
public MetricsLineage(final Lineage.Type type) {
29+
if (MetricsFlag.lineageRollupEnabled()) {
30+
this.metric =
31+
Metrics.boundedTrie(
32+
Lineage.LINEAGE_NAMESPACE,
33+
type == Lineage.Type.SOURCE
34+
? Lineage.Type.SOURCEV2.toString()
35+
: Lineage.Type.SINKV2.toString());
36+
} else {
37+
this.metric = Metrics.stringSet(Lineage.LINEAGE_NAMESPACE, type.toString());
38+
}
39+
}
40+
41+
@Override
42+
public void add(final Iterable<String> rollupSegments) {
43+
ImmutableList<String> segments = ImmutableList.copyOf(rollupSegments);
44+
if (MetricsFlag.lineageRollupEnabled()) {
45+
((BoundedTrie) this.metric).add(segments);
46+
} else {
47+
((StringSet) this.metric).add(String.join("", segments));
48+
}
49+
}
50+
51+
@Override
52+
public void add(
53+
final String system,
54+
final String subtype,
55+
final Iterable<String> segments,
56+
final String lastSegmentSep) {
57+
add(Lineage.getFQNParts(system, subtype, segments, lastSegmentSep));
58+
}
59+
}

0 commit comments

Comments
 (0)