Skip to content

Commit 4fa0f43

Browse files
committed
Updated XML docs
1 parent 7075351 commit 4fa0f43

File tree

7 files changed

+727
-454
lines changed

7 files changed

+727
-454
lines changed

CardinalityEstimation/BiasCorrection.cs

Lines changed: 139 additions & 347 deletions
Large diffs are not rendered by default.

CardinalityEstimation/CardinalityEstimator.cs

Lines changed: 175 additions & 52 deletions
Large diffs are not rendered by default.

CardinalityEstimation/CardinalityEstimatorExtensions.cs

Lines changed: 145 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,29 @@ namespace CardinalityEstimation
3131
using System.Threading.Tasks;
3232

3333
/// <summary>
34-
/// Provides extension methods and utilities for cardinality estimators, particularly for concurrent operations.
34+
/// Provides extension methods and utilities for cardinality estimators, particularly for concurrent operations
35+
/// and advanced scenarios like parallel processing and distributed computing.
3536
/// </summary>
37+
/// <remarks>
38+
/// <para>This class contains methods that extend the functionality of both regular and concurrent
39+
/// cardinality estimators with additional capabilities for high-performance and distributed scenarios.</para>
40+
/// <para>The methods in this class are designed to be thread-safe when working with concurrent estimators
41+
/// and provide optimizations for bulk operations.</para>
42+
/// </remarks>
3643
public static class CardinalityEstimatorExtensions
3744
{
3845
/// <summary>
39-
/// Converts a regular CardinalityEstimator to a thread-safe ConcurrentCardinalityEstimator
46+
/// Converts a regular CardinalityEstimator to a thread-safe ConcurrentCardinalityEstimator.
4047
/// </summary>
4148
/// <param name="estimator">The estimator to convert</param>
42-
/// <returns>A new thread-safe ConcurrentCardinalityEstimator</returns>
49+
/// <returns>
50+
/// A new thread-safe ConcurrentCardinalityEstimator with the same state and configuration,
51+
/// or null if the input estimator is null
52+
/// </returns>
53+
/// <remarks>
54+
/// This method creates a snapshot of the current estimator state and initializes a new
55+
/// concurrent estimator with that state. The original estimator remains unchanged.
56+
/// </remarks>
4357
public static ConcurrentCardinalityEstimator ToConcurrent(this CardinalityEstimator estimator)
4458
{
4559
if (estimator == null)
@@ -49,11 +63,27 @@ public static ConcurrentCardinalityEstimator ToConcurrent(this CardinalityEstima
4963
}
5064

5165
/// <summary>
52-
/// Merges a collection of CardinalityEstimators in parallel
66+
/// Merges a collection of CardinalityEstimators in parallel for improved performance
67+
/// with large numbers of estimators.
5368
/// </summary>
54-
/// <param name="estimators">The estimators to merge</param>
55-
/// <param name="parallelismDegree">Maximum degree of parallelism. If null, uses default Task scheduler behavior.</param>
56-
/// <returns>A new ConcurrentCardinalityEstimator with merged results</returns>
69+
/// <param name="estimators">The collection of estimators to merge</param>
70+
/// <param name="parallelismDegree">
71+
/// Maximum degree of parallelism. If null, uses the default Task scheduler behavior
72+
/// based on available processor cores.
73+
/// </param>
74+
/// <returns>
75+
/// A new ConcurrentCardinalityEstimator containing the merged results of all input estimators,
76+
/// or null if no valid estimators are provided
77+
/// </returns>
78+
/// <remarks>
79+
/// <para>This method first converts all regular estimators to concurrent estimators, then
80+
/// uses parallel merge algorithms to efficiently combine large numbers of estimators.</para>
81+
/// <para>All input estimators must have the same accuracy parameters (bitsPerIndex).</para>
82+
/// <para>The parallel approach is most beneficial when merging many estimators (typically 10+).</para>
83+
/// </remarks>
84+
/// <exception cref="ArgumentException">
85+
/// Thrown when estimators have different accuracy parameters
86+
/// </exception>
5787
public static ConcurrentCardinalityEstimator ParallelMerge(this IEnumerable<CardinalityEstimator> estimators, int? parallelismDegree = null)
5888
{
5989
if (estimators == null)
@@ -73,10 +103,25 @@ public static ConcurrentCardinalityEstimator ParallelMerge(this IEnumerable<Card
73103
}
74104

75105
/// <summary>
76-
/// Safely merges estimators with automatic null checking and type conversion
106+
/// Safely merges estimators with automatic null checking and type conversion.
107+
/// This method accepts mixed collections of CardinalityEstimator and ConcurrentCardinalityEstimator instances.
77108
/// </summary>
78-
/// <param name="estimators">Mixed collection of CardinalityEstimator and ConcurrentCardinalityEstimator instances</param>
79-
/// <returns>A merged ConcurrentCardinalityEstimator or null if no valid estimators provided</returns>
109+
/// <param name="estimators">
110+
/// Mixed collection of CardinalityEstimator and ConcurrentCardinalityEstimator instances to merge
111+
/// </param>
112+
/// <returns>
113+
/// A merged ConcurrentCardinalityEstimator containing the union of all input estimators,
114+
/// or null if no valid estimators are provided
115+
/// </returns>
116+
/// <remarks>
117+
/// <para>This method provides a convenient way to merge estimators of different types without
118+
/// explicit conversion. Regular estimators are automatically converted to concurrent estimators.</para>
119+
/// <para>Null values in the input are automatically filtered out.</para>
120+
/// <para>All estimators must have compatible accuracy parameters.</para>
121+
/// </remarks>
122+
/// <exception cref="ArgumentException">
123+
/// Thrown when an estimator has an unsupported type or when estimators have incompatible parameters
124+
/// </exception>
80125
public static ConcurrentCardinalityEstimator SafeMerge(params object[] estimators)
81126
{
82127
if (estimators == null || !estimators.Any())
@@ -105,13 +150,32 @@ public static ConcurrentCardinalityEstimator SafeMerge(params object[] estimator
105150
}
106151

107152
/// <summary>
108-
/// Creates multiple concurrent estimators for distributed processing scenarios
153+
/// Creates multiple concurrent estimators for distributed processing scenarios where
154+
/// you need to process data across multiple threads or nodes.
109155
/// </summary>
110156
/// <param name="count">Number of estimators to create</param>
111-
/// <param name="hashFunction">Hash function to use (optional)</param>
112-
/// <param name="b">Accuracy parameter</param>
113-
/// <param name="useDirectCounting">Whether to use direct counting for small cardinalities</param>
114-
/// <returns>Array of concurrent cardinality estimators</returns>
157+
/// <param name="hashFunction">
158+
/// Hash function to use for all estimators. If null, uses the default XxHash128.
159+
/// All estimators will use the same hash function for compatibility.
160+
/// </param>
161+
/// <param name="b">
162+
/// Accuracy parameter for all estimators. Must be in the range [4, 16].
163+
/// All estimators will have the same accuracy to ensure they can be merged.
164+
/// </param>
165+
/// <param name="useDirectCounting">
166+
/// Whether to enable direct counting for small cardinalities on all estimators
167+
/// </param>
168+
/// <returns>An array of concurrent cardinality estimators with identical configurations</returns>
169+
/// <remarks>
170+
/// <para>This method is useful for distributed processing scenarios where you want to
171+
/// process data in parallel across multiple estimators and then merge the results.</para>
172+
/// <para>All created estimators have identical configurations to ensure compatibility
173+
/// when merging results.</para>
174+
/// </remarks>
175+
/// <exception cref="ArgumentOutOfRangeException">
176+
/// Thrown when <paramref name="count"/> is less than or equal to zero, or when
177+
/// <paramref name="b"/> is not in the valid range [4, 16]
178+
/// </exception>
115179
public static ConcurrentCardinalityEstimator[] CreateMultiple(int count, GetHashCodeDelegate hashFunction = null, int b = 14, bool useDirectCounting = true)
116180
{
117181
if (count <= 0)
@@ -127,12 +191,31 @@ public static ConcurrentCardinalityEstimator[] CreateMultiple(int count, GetHash
127191
}
128192

129193
/// <summary>
130-
/// Executes an action in parallel across multiple estimators
194+
/// Executes element addition operations in parallel across multiple estimators using
195+
/// a specified partitioning strategy for optimal load distribution.
131196
/// </summary>
132-
/// <typeparam name="T">Type of elements to add</typeparam>
133-
/// <param name="estimators">The estimators to operate on</param>
134-
/// <param name="elements">Elements to distribute across estimators</param>
135-
/// <param name="partitionStrategy">Strategy for partitioning elements across estimators</param>
197+
/// <typeparam name="T">Type of elements to add to the estimators</typeparam>
198+
/// <param name="estimators">The array of estimators to distribute elements across</param>
199+
/// <param name="elements">Collection of elements to add to the estimators</param>
200+
/// <param name="partitionStrategy">
201+
/// Strategy for partitioning elements across estimators. Different strategies may be
202+
/// optimal for different data distributions and processing patterns.
203+
/// </param>
204+
/// <remarks>
205+
/// <para>This method is designed for high-throughput scenarios where you need to process
206+
/// large numbers of elements across multiple estimators in parallel.</para>
207+
/// <para>The choice of partition strategy can affect performance and load balancing:
208+
/// - RoundRobin: Good for uniform element distribution
209+
/// - Chunked: Good for maintaining locality of reference
210+
/// - Hash: Good for ensuring consistent assignment of similar elements</para>
211+
/// <para>Supported element types: string, int, uint, long, ulong, float, double, byte[]</para>
212+
/// </remarks>
213+
/// <exception cref="ArgumentException">
214+
/// Thrown when the estimators array is null or empty
215+
/// </exception>
216+
/// <exception cref="ArgumentException">
217+
/// Thrown when elements contain unsupported types
218+
/// </exception>
136219
public static void ParallelAdd<T>(this ConcurrentCardinalityEstimator[] estimators,
137220
IEnumerable<T> elements,
138221
PartitionStrategy partitionStrategy = PartitionStrategy.RoundRobin)
@@ -191,6 +274,19 @@ public static void ParallelAdd<T>(this ConcurrentCardinalityEstimator[] estimato
191274
});
192275
}
193276

277+
/// <summary>
278+
/// Creates a partitioner function that distributes elements across estimators according
279+
/// to the specified strategy.
280+
/// </summary>
281+
/// <typeparam name="T">Type of elements to partition</typeparam>
282+
/// <param name="estimatorCount">Number of estimators to distribute across</param>
283+
/// <param name="strategy">Partitioning strategy to use</param>
284+
/// <returns>
285+
/// A function that takes a list of elements and returns partitions with estimator indices
286+
/// </returns>
287+
/// <exception cref="ArgumentException">
288+
/// Thrown when an unknown partition strategy is specified
289+
/// </exception>
194290
private static Func<IList<T>, IEnumerable<(int EstimatorIndex, IEnumerable<T> Elements)>> CreatePartitioner<T>(
195291
int estimatorCount,
196292
PartitionStrategy strategy)
@@ -222,23 +318,48 @@ public static void ParallelAdd<T>(this ConcurrentCardinalityEstimator[] estimato
222318
}
223319

224320
/// <summary>
225-
/// Strategy for partitioning elements across multiple estimators
321+
/// Defines strategies for partitioning elements across multiple cardinality estimators
322+
/// in parallel processing scenarios.
226323
/// </summary>
324+
/// <remarks>
325+
/// The choice of partition strategy can significantly impact performance and load balancing
326+
/// depending on the characteristics of your data and processing environment.
327+
/// </remarks>
227328
public enum PartitionStrategy
228329
{
229330
/// <summary>
230-
/// Distribute elements in round-robin fashion
331+
/// Distributes elements in round-robin fashion across estimators.
332+
/// This strategy provides good load balancing for uniformly distributed data
333+
/// and is the default choice for most scenarios.
231334
/// </summary>
335+
/// <remarks>
336+
/// Elements are assigned to estimators in cyclic order: first element to estimator 0,
337+
/// second to estimator 1, etc., wrapping around after reaching the last estimator.
338+
/// </remarks>
232339
RoundRobin,
233340

234341
/// <summary>
235-
/// Split elements into contiguous chunks
342+
/// Splits elements into contiguous chunks, with each chunk assigned to a different estimator.
343+
/// This strategy maintains data locality and can improve cache performance when processing
344+
/// related or ordered data.
236345
/// </summary>
346+
/// <remarks>
347+
/// Elements are divided into approximately equal-sized contiguous chunks, with each
348+
/// chunk processed by a different estimator. This can be beneficial when elements
349+
/// have spatial or temporal locality that should be preserved.
350+
/// </remarks>
237351
Chunked,
238352

239353
/// <summary>
240-
/// Distribute based on hash code of elements
354+
/// Distributes elements based on their hash code to ensure consistent assignment.
355+
/// This strategy guarantees that identical elements always go to the same estimator,
356+
/// which can be useful for certain distributed processing patterns.
241357
/// </summary>
358+
/// <remarks>
359+
/// The hash-based distribution uses the element's GetHashCode() method to determine
360+
/// which estimator it should be assigned to. This provides deterministic assignment
361+
/// and can help with deduplication scenarios.
362+
/// </remarks>
242363
Hash
243364
}
244365
}

0 commit comments

Comments
 (0)