@@ -31,15 +31,29 @@ namespace CardinalityEstimation
3131 using System . Threading . Tasks ;
3232
3333 /// <summary>
34- /// Provides extension methods and utilities for cardinality estimators, particularly for concurrent operations.
34+ /// Provides extension methods and utilities for cardinality estimators, particularly for concurrent operations
35+ /// and advanced scenarios like parallel processing and distributed computing.
3536 /// </summary>
37+ /// <remarks>
38+ /// <para>This class contains methods that extend the functionality of both regular and concurrent
39+ /// cardinality estimators with additional capabilities for high-performance and distributed scenarios.</para>
40+ /// <para>The methods in this class are designed to be thread-safe when working with concurrent estimators
41+ /// and provide optimizations for bulk operations.</para>
42+ /// </remarks>
3643 public static class CardinalityEstimatorExtensions
3744 {
3845 /// <summary>
39- /// Converts a regular CardinalityEstimator to a thread-safe ConcurrentCardinalityEstimator
46+ /// Converts a regular CardinalityEstimator to a thread-safe ConcurrentCardinalityEstimator.
4047 /// </summary>
4148 /// <param name="estimator">The estimator to convert</param>
42- /// <returns>A new thread-safe ConcurrentCardinalityEstimator</returns>
49+ /// <returns>
50+ /// A new thread-safe ConcurrentCardinalityEstimator with the same state and configuration,
51+ /// or null if the input estimator is null
52+ /// </returns>
53+ /// <remarks>
54+ /// This method creates a snapshot of the current estimator state and initializes a new
55+ /// concurrent estimator with that state. The original estimator remains unchanged.
56+ /// </remarks>
4357 public static ConcurrentCardinalityEstimator ToConcurrent ( this CardinalityEstimator estimator )
4458 {
4559 if ( estimator == null )
@@ -49,11 +63,27 @@ public static ConcurrentCardinalityEstimator ToConcurrent(this CardinalityEstima
4963 }
5064
5165 /// <summary>
52- /// Merges a collection of CardinalityEstimators in parallel
66+ /// Merges a collection of CardinalityEstimators in parallel for improved performance
67+ /// with large numbers of estimators.
5368 /// </summary>
54- /// <param name="estimators">The estimators to merge</param>
55- /// <param name="parallelismDegree">Maximum degree of parallelism. If null, uses default Task scheduler behavior.</param>
56- /// <returns>A new ConcurrentCardinalityEstimator with merged results</returns>
69+ /// <param name="estimators">The collection of estimators to merge</param>
70+ /// <param name="parallelismDegree">
71+ /// Maximum degree of parallelism. If null, uses the default Task scheduler behavior
72+ /// based on available processor cores.
73+ /// </param>
74+ /// <returns>
75+ /// A new ConcurrentCardinalityEstimator containing the merged results of all input estimators,
76+ /// or null if no valid estimators are provided
77+ /// </returns>
78+ /// <remarks>
79+ /// <para>This method first converts all regular estimators to concurrent estimators, then
80+ /// uses parallel merge algorithms to efficiently combine large numbers of estimators.</para>
81+ /// <para>All input estimators must have the same accuracy parameters (bitsPerIndex).</para>
82+ /// <para>The parallel approach is most beneficial when merging many estimators (typically 10+).</para>
83+ /// </remarks>
84+ /// <exception cref="ArgumentException">
85+ /// Thrown when estimators have different accuracy parameters
86+ /// </exception>
5787 public static ConcurrentCardinalityEstimator ParallelMerge ( this IEnumerable < CardinalityEstimator > estimators , int ? parallelismDegree = null )
5888 {
5989 if ( estimators == null )
@@ -73,10 +103,25 @@ public static ConcurrentCardinalityEstimator ParallelMerge(this IEnumerable<Card
73103 }
74104
75105 /// <summary>
76- /// Safely merges estimators with automatic null checking and type conversion
106+ /// Safely merges estimators with automatic null checking and type conversion.
107+ /// This method accepts mixed collections of CardinalityEstimator and ConcurrentCardinalityEstimator instances.
77108 /// </summary>
78- /// <param name="estimators">Mixed collection of CardinalityEstimator and ConcurrentCardinalityEstimator instances</param>
79- /// <returns>A merged ConcurrentCardinalityEstimator or null if no valid estimators provided</returns>
109+ /// <param name="estimators">
110+ /// Mixed collection of CardinalityEstimator and ConcurrentCardinalityEstimator instances to merge
111+ /// </param>
112+ /// <returns>
113+ /// A merged ConcurrentCardinalityEstimator containing the union of all input estimators,
114+ /// or null if no valid estimators are provided
115+ /// </returns>
116+ /// <remarks>
117+ /// <para>This method provides a convenient way to merge estimators of different types without
118+ /// explicit conversion. Regular estimators are automatically converted to concurrent estimators.</para>
119+ /// <para>Null values in the input are automatically filtered out.</para>
120+ /// <para>All estimators must have compatible accuracy parameters.</para>
121+ /// </remarks>
122+ /// <exception cref="ArgumentException">
123+ /// Thrown when an estimator has an unsupported type or when estimators have incompatible parameters
124+ /// </exception>
80125 public static ConcurrentCardinalityEstimator SafeMerge ( params object [ ] estimators )
81126 {
82127 if ( estimators == null || ! estimators . Any ( ) )
@@ -105,13 +150,32 @@ public static ConcurrentCardinalityEstimator SafeMerge(params object[] estimator
105150 }
106151
107152 /// <summary>
108- /// Creates multiple concurrent estimators for distributed processing scenarios
153+ /// Creates multiple concurrent estimators for distributed processing scenarios where
154+ /// you need to process data across multiple threads or nodes.
109155 /// </summary>
110156 /// <param name="count">Number of estimators to create</param>
111- /// <param name="hashFunction">Hash function to use (optional)</param>
112- /// <param name="b">Accuracy parameter</param>
113- /// <param name="useDirectCounting">Whether to use direct counting for small cardinalities</param>
114- /// <returns>Array of concurrent cardinality estimators</returns>
157+ /// <param name="hashFunction">
158+ /// Hash function to use for all estimators. If null, uses the default XxHash128.
159+ /// All estimators will use the same hash function for compatibility.
160+ /// </param>
161+ /// <param name="b">
162+ /// Accuracy parameter for all estimators. Must be in the range [4, 16].
163+ /// All estimators will have the same accuracy to ensure they can be merged.
164+ /// </param>
165+ /// <param name="useDirectCounting">
166+ /// Whether to enable direct counting for small cardinalities on all estimators
167+ /// </param>
168+ /// <returns>An array of concurrent cardinality estimators with identical configurations</returns>
169+ /// <remarks>
170+ /// <para>This method is useful for distributed processing scenarios where you want to
171+ /// process data in parallel across multiple estimators and then merge the results.</para>
172+ /// <para>All created estimators have identical configurations to ensure compatibility
173+ /// when merging results.</para>
174+ /// </remarks>
175+ /// <exception cref="ArgumentOutOfRangeException">
176+ /// Thrown when <paramref name="count"/> is less than or equal to zero, or when
177+ /// <paramref name="b"/> is not in the valid range [4, 16]
178+ /// </exception>
115179 public static ConcurrentCardinalityEstimator [ ] CreateMultiple ( int count , GetHashCodeDelegate hashFunction = null , int b = 14 , bool useDirectCounting = true )
116180 {
117181 if ( count <= 0 )
@@ -127,12 +191,31 @@ public static ConcurrentCardinalityEstimator[] CreateMultiple(int count, GetHash
127191 }
128192
129193 /// <summary>
130- /// Executes an action in parallel across multiple estimators
194+ /// Executes element addition operations in parallel across multiple estimators using
195+ /// a specified partitioning strategy for optimal load distribution.
131196 /// </summary>
132- /// <typeparam name="T">Type of elements to add</typeparam>
133- /// <param name="estimators">The estimators to operate on</param>
134- /// <param name="elements">Elements to distribute across estimators</param>
135- /// <param name="partitionStrategy">Strategy for partitioning elements across estimators</param>
197+ /// <typeparam name="T">Type of elements to add to the estimators</typeparam>
198+ /// <param name="estimators">The array of estimators to distribute elements across</param>
199+ /// <param name="elements">Collection of elements to add to the estimators</param>
200+ /// <param name="partitionStrategy">
201+ /// Strategy for partitioning elements across estimators. Different strategies may be
202+ /// optimal for different data distributions and processing patterns.
203+ /// </param>
204+ /// <remarks>
205+ /// <para>This method is designed for high-throughput scenarios where you need to process
206+ /// large numbers of elements across multiple estimators in parallel.</para>
207+ /// <para>The choice of partition strategy can affect performance and load balancing:
208+ /// - RoundRobin: Good for uniform element distribution
209+ /// - Chunked: Good for maintaining locality of reference
210+ /// - Hash: Good for ensuring consistent assignment of similar elements</para>
211+ /// <para>Supported element types: string, int, uint, long, ulong, float, double, byte[]</para>
212+ /// </remarks>
213+ /// <exception cref="ArgumentException">
214+ /// Thrown when the estimators array is null or empty
215+ /// </exception>
216+ /// <exception cref="ArgumentException">
217+ /// Thrown when elements contain unsupported types
218+ /// </exception>
136219 public static void ParallelAdd < T > ( this ConcurrentCardinalityEstimator [ ] estimators ,
137220 IEnumerable < T > elements ,
138221 PartitionStrategy partitionStrategy = PartitionStrategy . RoundRobin )
@@ -191,6 +274,19 @@ public static void ParallelAdd<T>(this ConcurrentCardinalityEstimator[] estimato
191274 } ) ;
192275 }
193276
277+ /// <summary>
278+ /// Creates a partitioner function that distributes elements across estimators according
279+ /// to the specified strategy.
280+ /// </summary>
281+ /// <typeparam name="T">Type of elements to partition</typeparam>
282+ /// <param name="estimatorCount">Number of estimators to distribute across</param>
283+ /// <param name="strategy">Partitioning strategy to use</param>
284+ /// <returns>
285+ /// A function that takes a list of elements and returns partitions with estimator indices
286+ /// </returns>
287+ /// <exception cref="ArgumentException">
288+ /// Thrown when an unknown partition strategy is specified
289+ /// </exception>
194290 private static Func < IList < T > , IEnumerable < ( int EstimatorIndex , IEnumerable < T > Elements ) > > CreatePartitioner < T > (
195291 int estimatorCount ,
196292 PartitionStrategy strategy )
@@ -222,23 +318,48 @@ public static void ParallelAdd<T>(this ConcurrentCardinalityEstimator[] estimato
222318 }
223319
224320 /// <summary>
225- /// Strategy for partitioning elements across multiple estimators
321+ /// Defines strategies for partitioning elements across multiple cardinality estimators
322+ /// in parallel processing scenarios.
226323 /// </summary>
324+ /// <remarks>
325+ /// The choice of partition strategy can significantly impact performance and load balancing
326+ /// depending on the characteristics of your data and processing environment.
327+ /// </remarks>
227328 public enum PartitionStrategy
228329 {
229330 /// <summary>
230- /// Distribute elements in round-robin fashion
331+ /// Distributes elements in round-robin fashion across estimators.
332+ /// This strategy provides good load balancing for uniformly distributed data
333+ /// and is the default choice for most scenarios.
231334 /// </summary>
335+ /// <remarks>
336+ /// Elements are assigned to estimators in cyclic order: first element to estimator 0,
337+ /// second to estimator 1, etc., wrapping around after reaching the last estimator.
338+ /// </remarks>
232339 RoundRobin ,
233340
234341 /// <summary>
235- /// Split elements into contiguous chunks
342+ /// Splits elements into contiguous chunks, with each chunk assigned to a different estimator.
343+ /// This strategy maintains data locality and can improve cache performance when processing
344+ /// related or ordered data.
236345 /// </summary>
346+ /// <remarks>
347+ /// Elements are divided into approximately equal-sized contiguous chunks, with each
348+ /// chunk processed by a different estimator. This can be beneficial when elements
349+ /// have spatial or temporal locality that should be preserved.
350+ /// </remarks>
237351 Chunked ,
238352
239353 /// <summary>
240- /// Distribute based on hash code of elements
354+ /// Distributes elements based on their hash code to ensure consistent assignment.
355+ /// This strategy guarantees that identical elements always go to the same estimator,
356+ /// which can be useful for certain distributed processing patterns.
241357 /// </summary>
358+ /// <remarks>
359+ /// The hash-based distribution uses the element's GetHashCode() method to determine
360+ /// which estimator it should be assigned to. This provides deterministic assignment
361+ /// and can help with deduplication scenarios.
362+ /// </remarks>
242363 Hash
243364 }
244365}
0 commit comments