@@ -95,6 +95,21 @@ pub struct Cli {
9595 #[ cfg( feature = "multi_thread" ) ]
9696 #[ arg( long, env = "FASTCRAWL_PARTITION" , default_value = "hash" ) ]
9797 pub partition : PartitionStrategyArg ,
98+
99+ /// Number of wiki prefix buckets (0 = auto = shard count)
100+ #[ cfg( feature = "multi_thread" ) ]
101+ #[ arg( long, env = "FASTCRAWL_PARTITION_BUCKETS" , default_value_t = 0 ) ]
102+ pub partition_buckets : usize ,
103+
104+ /// Treat namespaces (e.g., Talk:, Help:) as part of the partition key
105+ #[ cfg( feature = "multi_thread" ) ]
106+ #[ arg( long, env = "FASTCRAWL_PARTITION_NAMESPACE" , default_value_t = false ) ]
107+ pub partition_namespace : bool ,
108+
109+ /// Maximum remote links to buffer before flushing to another shard (0 = default)
110+ #[ cfg( feature = "multi_thread" ) ]
111+ #[ arg( long, env = "FASTCRAWL_REMOTE_BATCH_SIZE" , default_value_t = 0 ) ]
112+ pub remote_batch_size : usize ,
98113}
99114
100115impl Cli {
@@ -123,8 +138,13 @@ impl Cli {
123138
124139 #[ cfg( feature = "multi_thread" ) ]
125140 /// Returns the requested sharding strategy when multi-threading is enabled.
126- pub fn partition_strategy ( & self ) -> PartitionStrategyArg {
127- self . partition
141+ pub fn partition_settings ( & self ) -> PartitionSettings {
142+ PartitionSettings {
143+ strategy : self . partition ,
144+ wiki_bucket_count : ( self . partition_buckets > 0 ) . then_some ( self . partition_buckets ) ,
145+ wiki_include_namespace : self . partition_namespace ,
146+ remote_batch_size : ( self . remote_batch_size > 0 ) . then_some ( self . remote_batch_size ) ,
147+ }
128148 }
129149}
130150
@@ -137,3 +157,17 @@ pub enum PartitionStrategyArg {
137157 /// Use Wikipedia-style namespace/title prefixes to keep related pages together.
138158 WikiPrefix ,
139159}
160+
161+ #[ cfg( feature = "multi_thread" ) ]
162+ #[ derive( Copy , Clone , Debug ) ]
163+ /// Parsed partition configuration used by the runtime.
164+ pub struct PartitionSettings {
165+ /// Selected strategy variant.
166+ pub strategy : PartitionStrategyArg ,
167+ /// Optional bucket count override for wiki prefix strategy (defaults to shard count when `None`).
168+ pub wiki_bucket_count : Option < usize > ,
169+ /// Whether to incorporate namespace prefixes (e.g., `Talk:`) into the partition key.
170+ pub wiki_include_namespace : bool ,
171+ /// Optional remote batch size override.
172+ pub remote_batch_size : Option < usize > ,
173+ }
0 commit comments