11package org .datacommons .pipeline .util ;
22
3+ import java .util .Arrays ;
4+ import java .util .Comparator ;
35import java .util .HashMap ;
6+ import java .util .List ;
47import java .util .Map ;
8+ import java .util .stream .Collectors ;
9+ import org .apache .beam .sdk .PipelineResult ;
10+ import org .apache .beam .sdk .extensions .protobuf .ProtoCoder ;
511import org .apache .beam .sdk .options .PipelineOptions ;
612import org .apache .beam .sdk .options .PipelineOptionsFactory ;
713import org .apache .beam .sdk .testing .PAssert ;
1218import org .datacommons .proto .Mcf .McfGraph ;
1319import org .datacommons .proto .Mcf .McfGraph .PropertyValues ;
1420import org .datacommons .proto .Mcf .McfGraph .TypedValue ;
21+ import org .datacommons .proto .Mcf .McfGraph .Values ;
22+ import org .datacommons .proto .Mcf .McfOptimizedGraph ;
23+ import org .datacommons .proto .Mcf .McfStatVarObsSeries ;
1524import org .datacommons .proto .Mcf .ValueType ;
25+ import org .junit .Assert ;
1626import org .junit .Rule ;
1727import org .junit .Test ;
1828import org .junit .runner .RunWith ;
2131@ RunWith (JUnit4 .class )
2232public class PipelineUtilsTest {
2333
24- PipelineOptions options = PipelineOptionsFactory .create ();
25-
34+ static PipelineOptions options = PipelineOptionsFactory .create ();
2635 @ Rule public TestPipeline p = TestPipeline .fromOptions (options );
2736
37+ private McfGraph createStatVarObservationGraph (
38+ String obsId , String statVar , String location , String date , String value ) {
39+ McfGraph .Builder graph = McfGraph .newBuilder ();
40+ PropertyValues .Builder pv = PropertyValues .newBuilder ();
41+ pv .putPvs (
42+ "typeOf" ,
43+ Values .newBuilder ()
44+ .addTypedValues (TypedValue .newBuilder ().setValue ("StatVarObservation" ))
45+ .build ());
46+ pv .putPvs (
47+ "variableMeasured" ,
48+ Values .newBuilder ().addTypedValues (TypedValue .newBuilder ().setValue (statVar )).build ());
49+ pv .putPvs (
50+ "observationAbout" ,
51+ Values .newBuilder ().addTypedValues (TypedValue .newBuilder ().setValue (location )).build ());
52+ pv .putPvs (
53+ "observationDate" ,
54+ Values .newBuilder ().addTypedValues (TypedValue .newBuilder ().setValue (date )).build ());
55+ pv .putPvs (
56+ "value" ,
57+ Values .newBuilder ().addTypedValues (TypedValue .newBuilder ().setValue (value )).build ());
58+ pv .putPvs (
59+ "dcid" ,
60+ Values .newBuilder ().addTypedValues (TypedValue .newBuilder ().setValue (obsId )).build ());
61+ graph .putNodes (obsId , pv .build ());
62+ return graph .build ();
63+ }
64+
65+ private McfStatVarObsSeries .StatVarObs createStatVarObs (String date , double value , String dcid ) {
66+ McfStatVarObsSeries .StatVarObs .Builder svObs = McfStatVarObsSeries .StatVarObs .newBuilder ();
67+ svObs .setDate (date );
68+ svObs .setNumber (value );
69+ svObs .setDcid (dcid );
70+ svObs .setPvs (PropertyValues .newBuilder ().build ());
71+ return svObs .build ();
72+ }
73+
74+ private McfStatVarObsSeries createMcfStatVarObsSeries (
75+ String statVar , String location , List <McfStatVarObsSeries .StatVarObs > observations ) {
76+ McfStatVarObsSeries .Key .Builder keyBuilder = McfStatVarObsSeries .Key .newBuilder ();
77+ keyBuilder .setObservationAbout (location );
78+ keyBuilder .setVariableMeasured (statVar );
79+
80+ List <McfStatVarObsSeries .StatVarObs > sortedSvObs =
81+ observations .stream ()
82+ .sorted (Comparator .comparing (McfStatVarObsSeries .StatVarObs ::getDate ))
83+ .collect (Collectors .toList ());
84+
85+ McfStatVarObsSeries .Builder seriesBuilder = McfStatVarObsSeries .newBuilder ();
86+ seriesBuilder .setKey (keyBuilder .build ());
87+ seriesBuilder .addAllSvObsList (sortedSvObs );
88+ return seriesBuilder .build ();
89+ }
90+
91+ @ Test
92+ public void testBuildOptimizedMcfGraph () {
93+ options .setStableUniqueNames (PipelineOptions .CheckEnabled .OFF );
94+ p .getCoderRegistry ()
95+ .registerCoderForClass (
96+ McfStatVarObsSeries .Key .class , ProtoCoder .of (McfStatVarObsSeries .Key .class ));
97+
98+ PCollection <McfGraph > input =
99+ p .apply (
100+ Create .of (
101+ createStatVarObservationGraph (
102+ "obs1" , "count_person" , "country/USA" , "2020" , "32.0" ),
103+ createStatVarObservationGraph (
104+ "obs2" , "count_person" , "country/USA" , "2021" , "33.0" ),
105+ createStatVarObservationGraph (
106+ "obs4" , "count_person" , "country/India" , "2022" , "36.0" )));
107+
108+ PCollection <McfOptimizedGraph > result = PipelineUtils .buildOptimizedMcfGraph (input );
109+
110+ McfOptimizedGraph expected1 =
111+ McfOptimizedGraph .newBuilder ()
112+ .setSvObsSeries (
113+ createMcfStatVarObsSeries (
114+ "count_person" ,
115+ "country/USA" ,
116+ Arrays .asList (
117+ createStatVarObs ("2020" , 32.0 , "obs1" ),
118+ createStatVarObs ("2021" , 33.0 , "obs2" ))))
119+ .build ();
120+ McfOptimizedGraph expected2 =
121+ McfOptimizedGraph .newBuilder ()
122+ .setSvObsSeries (
123+ createMcfStatVarObsSeries (
124+ "count_person" ,
125+ "country/India" ,
126+ List .of (createStatVarObs ("2022" , 36.0 , "obs4" ))))
127+ .build ();
128+
129+ PAssert .that (result ).containsInAnyOrder (expected1 , expected2 );
130+ PipelineResult .State state = p .run ().waitUntilFinish ();
131+ Assert .assertEquals (PipelineResult .State .DONE , state );
132+ }
133+
28134 @ Test
29135 public void testCombineGraphNodes () {
30136 // Input Graph 1
31137 McfGraph graph1 =
32- McfGraph . newBuilder ()
33- . putNodes (
138+ createGraph (
139+ Map . of (
34140 "node1" ,
35- PropertyValues .newBuilder ()
36- .putPvs (
37- "propA" ,
38- McfGraph .Values .newBuilder ()
39- .addTypedValues (
40- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("val1" ))
41- .build ())
42- .putPvs (
43- "propB" ,
44- McfGraph .Values .newBuilder ()
45- .addTypedValues (
46- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valB1" ))
47- .build ())
48- .build ())
49- .putNodes (
141+ Map .of (
142+ "propA" , List .of ("val1" ),
143+ "propB" , List .of ("valB1" )),
50144 "node2" ,
51- PropertyValues .newBuilder ()
52- .putPvs (
53- "propC" ,
54- McfGraph .Values .newBuilder ()
55- .addTypedValues (
56- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valC1" ))
57- .build ())
58- .build ())
59- .build ();
145+ Map .of ("propC" , List .of ("valC1" ))));
60146
61147 // Input Graph 2
62148 McfGraph graph2 =
63- McfGraph . newBuilder ()
64- . putNodes (
149+ createGraph (
150+ Map . of (
65151 "node1" ,
66- PropertyValues .newBuilder ()
67- .putPvs (
68- "propA" ,
69- McfGraph .Values .newBuilder ()
70- .addTypedValues (
71- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("val1" ))
72- .addTypedValues (
73- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("val2" ))
74- .build ())
75- .putPvs (
76- "propD" ,
77- McfGraph .Values .newBuilder ()
78- .addTypedValues (
79- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valD1" ))
80- .build ())
81- .build ())
82- .putNodes (
152+ Map .of (
153+ "propA" , List .of ("val1" , "val2" ),
154+ "propD" , List .of ("valD1" )),
83155 "node3" ,
84- PropertyValues .newBuilder ()
85- .putPvs (
86- "propE" ,
87- McfGraph .Values .newBuilder ()
88- .addTypedValues (
89- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valE1" ))
90- .build ())
91- .build ())
92- .build ();
156+ Map .of ("propE" , List .of ("valE1" ))));
93157
94158 // Expected Combined Graph
95159 McfGraph expectedCombinedGraph =
96- McfGraph . newBuilder ()
97- . putNodes (
160+ createGraph (
161+ Map . of (
98162 "node1" ,
99- PropertyValues .newBuilder ()
100- .putPvs (
101- "propA" ,
102- McfGraph .Values .newBuilder ()
103- .addTypedValues (
104- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("val1" ))
105- .addTypedValues (
106- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("val2" ))
107- .build ())
108- .putPvs (
109- "propB" ,
110- McfGraph .Values .newBuilder ()
111- .addTypedValues (
112- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valB1" ))
113- .build ())
114- .putPvs (
115- "propD" ,
116- McfGraph .Values .newBuilder ()
117- .addTypedValues (
118- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valD1" ))
119- .build ())
120- .build ())
121- .putNodes (
163+ Map .of (
164+ "propA" , List .of ("val1" , "val2" ),
165+ "propB" , List .of ("valB1" ),
166+ "propD" , List .of ("valD1" )),
122167 "node2" ,
123- PropertyValues .newBuilder ()
124- .putPvs (
125- "propC" ,
126- McfGraph .Values .newBuilder ()
127- .addTypedValues (
128- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valC1" ))
129- .build ())
130- .build ())
131- .putNodes (
168+ Map .of ("propC" , List .of ("valC1" )),
132169 "node3" ,
133- PropertyValues .newBuilder ()
134- .putPvs (
135- "propE" ,
136- McfGraph .Values .newBuilder ()
137- .addTypedValues (
138- TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue ("valE1" ))
139- .build ())
140- .build ())
141- .build ();
170+ Map .of ("propE" , List .of ("valE1" ))));
142171
143172 PCollection <McfGraph > input = p .apply ("CreateInput" , Create .of (graph1 , graph2 ));
144173 PCollection <McfGraph > output = PipelineUtils .combineGraphNodes (input );
145174
146- // The combineGraphNodes method returns a PCollection where each element is an McfGraph
147- // containing a single combined node. To compare against a single expected graph,
148- // we need to merge these single-node graphs back into one.
149175 PCollection <McfGraph > mergedOutput =
150176 output .apply (
151177 "MergeOutputGraphs" , Combine .globally (new MergeMcfGraphsCombineFn ()).withoutDefaults ());
152-
153178 PAssert .thatSingleton (mergedOutput ).isEqualTo (expectedCombinedGraph );
179+ PipelineResult .State state = p .run ().waitUntilFinish ();
180+ Assert .assertEquals (PipelineResult .State .DONE , state );
181+ }
154182
155- p .run ().waitUntilFinish ();
183+ private McfGraph createGraph (Map <String , Map <String , List <String >>> nodeData ) {
184+ McfGraph .Builder graph = McfGraph .newBuilder ();
185+ for (Map .Entry <String , Map <String , List <String >>> nodeEntry : nodeData .entrySet ()) {
186+ String nodeName = nodeEntry .getKey ();
187+ Map <String , List <String >> props = nodeEntry .getValue ();
188+ PropertyValues .Builder pvs = PropertyValues .newBuilder ();
189+ for (Map .Entry <String , List <String >> propEntry : props .entrySet ()) {
190+ String propName = propEntry .getKey ();
191+ List <String > values = propEntry .getValue ();
192+ McfGraph .Values .Builder valuesBuilder = McfGraph .Values .newBuilder ();
193+ for (String value : values ) {
194+ valuesBuilder .addTypedValues (
195+ TypedValue .newBuilder ().setType (ValueType .TEXT ).setValue (value ));
196+ }
197+ pvs .putPvs (propName , valuesBuilder .build ());
198+ }
199+ graph .putNodes (nodeName , pvs .build ());
200+ }
201+ return graph .build ();
156202 }
157203
158- // A CombineFn to merge multiple McfGraph objects (each containing a single node) into a single
159- // McfGraph containing all nodes.
160204 static class MergeMcfGraphsCombineFn
161205 extends Combine .CombineFn <McfGraph , Map <String , PropertyValues >, McfGraph > {
162206 @ Override
@@ -167,7 +211,6 @@ public Map<String, PropertyValues> createAccumulator() {
167211 @ Override
168212 public Map <String , PropertyValues > addInput (
169213 Map <String , PropertyValues > accumulator , McfGraph input ) {
170- // Each input McfGraph is expected to contain exactly one node.
171214 accumulator .putAll (input .getNodesMap ());
172215 return accumulator ;
173216 }
0 commit comments