@@ -22,94 +22,39 @@ internal sealed class RowConstructor : IObjectConstructor
2222 /// sent per batch if there are nested rows contained in the row. Note that
2323 /// this is thread local variable because one RowConstructor object is
2424 /// registered to the Unpickler and there could be multiple threads unpickling
25- /// the data using the same object registered.
25+ /// the data using the same registered object .
2626 /// </summary>
2727 [ ThreadStatic ]
2828 private static IDictionary < string , StructType > s_schemaCache ;
2929
3030 /// <summary>
31- /// The RowConstructor that created this instance.
31+ /// Used by Unpickler to pass unpickled schema for handling. The Unpickler
32+ /// will reuse the <see cref="RowConstructor"/> object when
33+ /// it needs to start constructing a <see cref="Row"/>. The schema is passed
34+ /// to <see cref="construct(object[])"/> and the returned
35+ /// <see cref="IObjectConstructor"/> is used to build the rest of the <see cref="Row"/>.
3236 /// </summary>
33- private readonly RowConstructor _parent ;
34-
35- /// <summary>
36- /// Stores the args passed from construct().
37- /// </summary>
38- private readonly object [ ] _args ;
39-
40- public RowConstructor ( ) : this ( null , null )
41- {
42- }
43-
44- public RowConstructor ( RowConstructor parent , object [ ] args )
45- {
46- _parent = parent ;
47- _args = args ;
48- }
49-
50- /// <summary>
51- /// Used by Unpickler to pass unpickled data for handling.
52- /// </summary>
53- /// <param name="args">Unpickled data</param>
54- /// <returns>New RowConstructor object capturing args data</returns>
37+ /// <param name="args">Unpickled schema</param>
38+ /// <returns>
39+ /// New <see cref="RowWithSchemaConstructor"/>object capturing the schema.
40+ /// </returns>
5541 public object construct ( object [ ] args )
5642 {
57- // Every first call to construct() contains the schema data. When
58- // a new RowConstructor object is returned from this function,
59- // construct() is called on the returned object with the actual
60- // row data. The original RowConstructor object may be reused by the
61- // Unpickler and each subsequent construct() call can contain the
62- // schema data or a RowConstructor object that contains row data.
6343 if ( s_schemaCache is null )
6444 {
6545 s_schemaCache = new Dictionary < string , StructType > ( ) ;
6646 }
6747
68- // Return a new RowConstructor where the args either represent the
69- // schema or the row data. The parent becomes important when calling
70- // GetRow() on the RowConstructor containing the row data.
71- //
72- // - When args is the schema, return a new RowConstructor where the
73- // parent is set to the calling RowConstructor object.
74- //
75- // - In the case where args is the row data, construct() is called on a
76- // RowConstructor object that contains the schema for the row data. A
77- // new RowConstructor is returned where the parent is set to the schema
78- // containing RowConstructor.
79- return new RowConstructor ( this , args ) ;
80- }
81-
82- /// <summary>
83- /// Construct a Row object from unpickled data. This is only to be called
84- /// on a RowConstructor that contains the row data.
85- /// </summary>
86- /// <returns>A row object with unpickled data</returns>
87- public Row GetRow ( )
88- {
89- Debug . Assert ( _parent != null ) ;
90-
91- // It is possible that an entry of a Row (row1) may itself be a Row (row2).
92- // If the entry is a RowConstructor then it will be a RowConstructor
93- // which contains the data for row2. Therefore we will call GetRow()
94- // on the RowConstructor to materialize row2 and replace the RowConstructor
95- // entry in row1.
96- for ( int i = 0 ; i < _args . Length ; ++ i )
97- {
98- if ( _args [ i ] is RowConstructor rowConstructor )
99- {
100- _args [ i ] = rowConstructor . GetRow ( ) ;
101- }
102- }
103-
104- return new Row ( _args , _parent . GetSchema ( ) ) ;
48+ Debug . Assert ( ( args != null ) && ( args . Length == 1 ) && ( args [ 0 ] is string ) ) ;
49+ return new RowWithSchemaConstructor ( GetSchema ( s_schemaCache , ( string ) args [ 0 ] ) ) ;
10550 }
10651
10752 /// <summary>
10853 /// Clears the schema cache. Spark sends rows in batches and for each
10954 /// row there is an accompanying set of schemas and row entries. If the
11055 /// schema was not cached, then it would need to be parsed and converted
11156 /// to a StructType for every row in the batch. A new batch may contain
112- /// rows from a different table, so calling <c> Reset</c > after each
57+ /// rows from a different table, so calling <see cref=" Reset"/ > after each
11358 /// batch would aid in preventing the cache from growing too large.
11459 /// Caching the schemas for each batch, ensures that each schema is
11560 /// only parsed and converted to a StructType once per batch.
@@ -119,23 +64,36 @@ internal void Reset()
11964 s_schemaCache ? . Clear ( ) ;
12065 }
12166
122- /// <summary>
123- /// Get or cache the schema string contained in args. Calling this
124- /// is only valid if the child args contain the row values.
125- /// </summary>
126- /// <returns></returns>
127- private StructType GetSchema ( )
67+ private static StructType GetSchema ( IDictionary < string , StructType > schemaCache , string schemaString )
12868 {
129- Debug . Assert ( s_schemaCache != null ) ;
130- Debug . Assert ( ( _args != null ) && ( _args . Length == 1 ) && ( _args [ 0 ] is string ) ) ;
131- var schemaString = ( string ) _args [ 0 ] ;
132- if ( ! s_schemaCache . TryGetValue ( schemaString , out StructType schema ) )
69+ if ( ! schemaCache . TryGetValue ( schemaString , out StructType schema ) )
13370 {
13471 schema = ( StructType ) DataType . ParseDataType ( schemaString ) ;
135- s_schemaCache . Add ( schemaString , schema ) ;
72+ schemaCache . Add ( schemaString , schema ) ;
13673 }
13774
13875 return schema ;
13976 }
14077 }
78+
79+ /// <summary>
80+ /// Created from <see cref="RowConstructor"/> and subsequently used
81+ /// by the Unpickler to construct a <see cref="Row"/>.
82+ /// </summary>
83+ internal sealed class RowWithSchemaConstructor : IObjectConstructor
84+ {
85+ private readonly StructType _schema ;
86+
87+ internal RowWithSchemaConstructor ( StructType schema )
88+ {
89+ _schema = schema ;
90+ }
91+
92+ /// <summary>
93+ /// Used by Unpickler to pass unpickled row values for handling.
94+ /// </summary>
95+ /// <param name="args">Unpickled row values.</param>
96+ /// <returns>Row object.</returns>
97+ public object construct ( object [ ] args ) => new Row ( args , _schema ) ;
98+ }
14199}
0 commit comments