1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to reorder Pulsar release notes based on a reference file structure
4
+ or reorganize a single file using component-based section assignment.
5
+
6
+ Single file mode: Takes one markdown file and reorganizes it by moving items
7
+ to appropriate sections based on their component tags ([broker], [client], etc.)
8
+
9
+ Reference mode: Takes two markdown files and reorders the second file to match
10
+ the section organization of the first (reference) file.
11
+ """
12
+
13
+ import re
14
+ import sys
15
+ from typing import Dict , List , Tuple , Optional
16
+ from dataclasses import dataclass
17
+
18
+ @dataclass
19
+ class ReleaseItem :
20
+ """Represents a single release note item"""
21
+ full_line : str
22
+ pr_number : Optional [str ] = None
23
+ component : Optional [str ] = None
24
+ type_tag : Optional [str ] = None
25
+
26
+ class PulsarReleaseReorderer :
27
+ def __init__ (self ):
28
+ self .sections = [
29
+ "Library updates" ,
30
+ "Broker" ,
31
+ "Client" ,
32
+ "Pulsar IO and Pulsar Functions" ,
33
+ "Others" ,
34
+ "Tests & CI"
35
+ ]
36
+
37
+ def extract_pr_number (self , line : str ) -> Optional [str ]:
38
+ """Extract PR number from a line like ([#24360](...)"""
39
+ match = re .search (r'\(\[#(\d+)\]' , line )
40
+ return match .group (1 ) if match else None
41
+
42
+ def extract_component_and_type (self , line : str ) -> Tuple [Optional [str ], Optional [str ]]:
43
+ """Extract component and type from lines like [fix][broker] or [improve][client]"""
44
+ match = re .match (r'- \[(\w+)\]\[(\w+)\]' , line .strip ())
45
+ if match :
46
+ return match .group (2 ), match .group (1 ) # component, type
47
+ return None , None
48
+
49
+ def parse_file (self , filename : str ) -> Dict [str , List [ReleaseItem ]]:
50
+ """Parse a release notes file and return items organized by section"""
51
+ sections = {section : [] for section in self .sections }
52
+ current_section = None
53
+
54
+ with open (filename , 'r' , encoding = 'utf-8' ) as f :
55
+ content = f .read ()
56
+
57
+ lines = content .split ('\n ' )
58
+
59
+ for line in lines :
60
+ # Check if line is a section header
61
+ if line .startswith ('### ' ) and line [4 :] in self .sections :
62
+ current_section = line [4 :]
63
+ continue
64
+
65
+ # Check if line is a release item (starts with '- [')
66
+ if line .strip ().startswith ('- [' ) and current_section :
67
+ pr_number = self .extract_pr_number (line )
68
+ component , type_tag = self .extract_component_and_type (line )
69
+
70
+ item = ReleaseItem (
71
+ full_line = line ,
72
+ pr_number = pr_number ,
73
+ component = component ,
74
+ type_tag = type_tag
75
+ )
76
+ sections [current_section ].append (item )
77
+
78
+ return sections
79
+
80
+ def determine_section_for_item (self , item : ReleaseItem , reference_sections : Dict [str , List [ReleaseItem ]]) -> str :
81
+ """Determine which section an item should go in based on reference file"""
82
+
83
+ # First, try to find the item by PR number in reference (if reference exists)
84
+ if item .pr_number and reference_sections :
85
+ for section_name , ref_items in reference_sections .items ():
86
+ for ref_item in ref_items :
87
+ if ref_item .pr_number == item .pr_number :
88
+ return section_name
89
+
90
+ # Check if line contains "upgrade" (case insensitive) - always goes to Library updates
91
+ if "upgrade" in item .full_line .lower ():
92
+ return 'Library updates'
93
+
94
+ # Use component-based heuristics
95
+ if item .component :
96
+ component_to_section = {
97
+ # Library updates
98
+ 'sec' : 'Library updates' , # Security updates often involve library upgrades
99
+
100
+ # Broker
101
+ 'admin' : 'Broker' ,
102
+ 'broker' : 'Broker' ,
103
+ 'meta' : 'Broker' ,
104
+ 'ml' : 'Broker' , # Managed ledger
105
+ 'monitor' : 'Broker' ,
106
+ 'offload' : 'Broker' ,
107
+ 'schema' : 'Broker' ,
108
+ 'storage' : 'Broker' ,
109
+ 'txn' : 'Broker' , # Transaction
110
+ 'zk' : 'Broker' , # ZooKeeper
111
+ 'bk' : 'Broker' , # BookKeeper
112
+
113
+ # Client
114
+ 'client' : 'Client' ,
115
+
116
+ # Pulsar IO and Pulsar Functions
117
+ 'fn' : 'Pulsar IO and Pulsar Functions' , # Functions
118
+ 'io' : 'Pulsar IO and Pulsar Functions' ,
119
+
120
+ # Others
121
+ 'proxy' : 'Others' ,
122
+ 'misc' : 'Others' ,
123
+ 'doc' : 'Others' ,
124
+ 'site' : 'Others' ,
125
+ 'pip' : 'Others' ,
126
+ 'ws' : 'Others' , # WebSocket
127
+ 'cli' : 'Others' , # Command line interface
128
+
129
+ # Tests & CI
130
+ 'build' : 'Tests & CI' ,
131
+ 'ci' : 'Tests & CI' ,
132
+ 'test' : 'Tests & CI'
133
+ }
134
+
135
+ if item .component in component_to_section :
136
+ return component_to_section [item .component ]
137
+
138
+ # Default fallback - place in Others
139
+ return 'Others'
140
+
141
+ def get_section_order_position (self , item : ReleaseItem , reference_items : List [ReleaseItem ]) -> int :
142
+ """Get the relative position of an item within its section based on reference"""
143
+ if not item .pr_number :
144
+ return 999 # Put items without PR numbers at the end
145
+
146
+ # Find the item in reference and return its position (if reference exists)
147
+ if reference_items :
148
+ for i , ref_item in enumerate (reference_items ):
149
+ if ref_item .pr_number == item .pr_number :
150
+ return i
151
+
152
+ # If not found in reference or no reference, use component and type for ordering
153
+ type_priority = {'fix' : 0 , 'improve' : 1 , 'feature' : 2 , 'cleanup' : 3 , 'refactor' : 4 , 'revert' : 5 }
154
+ component_priority = {
155
+ # Core components first
156
+ 'broker' : 0 , 'client' : 1 , 'admin' : 2 ,
157
+ # Storage and ledger
158
+ 'ml' : 3 , 'storage' : 4 , 'bk' : 5 , 'offload' : 6 ,
159
+ # Functions and IO
160
+ 'fn' : 7 , 'io' : 8 ,
161
+ # Infrastructure
162
+ 'proxy' : 9 , 'meta' : 10 , 'zk' : 11 , 'monitor' : 12 ,
163
+ # Protocol and communication
164
+ 'txn' : 13 , 'schema' : 14 ,
165
+ # Security and build
166
+ 'sec' : 15 , 'build' : 16 , 'ci' : 17 , 'test' : 18 ,
167
+ # Documentation and tooling
168
+ 'doc' : 19 , 'site' : 20 , 'pip' : 21 ,
169
+ # Others components
170
+ 'ws' : 22 , 'cli' : 23 ,
171
+ # Miscellaneous
172
+ 'misc' : 24
173
+ }
174
+
175
+ type_pos = type_priority .get (item .type_tag , 999 )
176
+ component_pos = component_priority .get (item .component , 999 )
177
+
178
+ return 900 + type_pos * 10 + component_pos # Put new items near end but ordered
179
+
180
+ def reorder_file (self , reference_file : Optional [str ], target_file : str , output_file : str ):
181
+ """Main function to reorder the target file based on reference file structure"""
182
+
183
+ # Handle reference file parsing
184
+ if reference_file :
185
+ print (f"Parsing reference file: { reference_file } " )
186
+ reference_sections = self .parse_file (reference_file )
187
+ else :
188
+ print ("Single file mode: Using component-based reorganization" )
189
+ reference_sections = {section : [] for section in self .sections }
190
+
191
+ print (f"Parsing target file: { target_file } " )
192
+ target_sections = self .parse_file (target_file )
193
+
194
+ # Read the original target file to preserve header and structure
195
+ with open (target_file , 'r' , encoding = 'utf-8' ) as f :
196
+ original_content = f .read ()
197
+
198
+ # Extract header (everything before first ###)
199
+ header_match = re .search (r'^(.*?)(^### )' , original_content , re .MULTILINE | re .DOTALL )
200
+ header = header_match .group (1 ) if header_match else ""
201
+
202
+ # Collect all items from target file
203
+ all_target_items = []
204
+ for section_items in target_sections .values ():
205
+ all_target_items .extend (section_items )
206
+
207
+ print (f"Found { len (all_target_items )} items to reorder" )
208
+
209
+ # Reorganize items by determining their proper sections
210
+ new_sections = {section : [] for section in self .sections }
211
+
212
+ for item in all_target_items :
213
+ proper_section = self .determine_section_for_item (item , reference_sections )
214
+ new_sections [proper_section ].append (item )
215
+
216
+ # Sort items within each section based on reference order
217
+ for section_name in self .sections :
218
+ reference_items = reference_sections .get (section_name , [])
219
+ new_sections [section_name ].sort (
220
+ key = lambda item : self .get_section_order_position (item , reference_items )
221
+ )
222
+
223
+ # Rebuild the file content
224
+ new_content = header
225
+
226
+ for section_name in self .sections :
227
+ items = new_sections [section_name ]
228
+ if items : # Only add section if it has items
229
+ new_content += f"### { section_name } \n \n "
230
+ for item in items :
231
+ new_content += f"{ item .full_line } \n "
232
+ new_content += "\n "
233
+
234
+ # Add the changelog link if it exists in original
235
+ changelog_match = re .search (r'For the complete list.*' , original_content )
236
+ if changelog_match :
237
+ new_content += f"{ changelog_match .group (0 )} \n "
238
+
239
+ # Write the reordered file
240
+ with open (output_file , 'w' , encoding = 'utf-8' ) as f :
241
+ f .write (new_content )
242
+
243
+ print (f"Reordered file written to: { output_file } " )
244
+
245
+ if reference_file :
246
+ print (f"Used reference file: { reference_file } " )
247
+ else :
248
+ print ("Used component-based reorganization (no reference file)" )
249
+
250
+ # Print summary
251
+ print ("\n Section summary:" )
252
+ for section_name in self .sections :
253
+ count = len (new_sections [section_name ])
254
+ if count > 0 :
255
+ print (f" { section_name } : { count } items" )
256
+
257
+ def main ():
258
+ if len (sys .argv ) < 2 or len (sys .argv ) > 4 :
259
+ print ("Usage: python reorder_pulsar_release.py <target_file>" )
260
+ print (" or: python reorder_pulsar_release.py <reference_file> <target_file> [output_file]" )
261
+ print ()
262
+ print ("Single file mode: Reorganizes the file using component-based section assignment" )
263
+ print (" Example: python reorder_pulsar_release.py pulsar-3.3.8.md" )
264
+ print ()
265
+ print ("Reference mode: Reorganizes target file based on reference file structure" )
266
+ print (" Example: python reorder_pulsar_release.py pulsar-3.0.13.md pulsar-3.3.8.md" )
267
+ print (" Example: python reorder_pulsar_release.py pulsar-3.0.13.md pulsar-3.3.8.md pulsar-3.3.8-reordered.md" )
268
+ print ()
269
+ print ("If output_file is not provided, the target_file will be updated in place." )
270
+ sys .exit (1 )
271
+
272
+ if len (sys .argv ) == 2 :
273
+ # Single file mode - reorganize based on components only
274
+ reference_file = None
275
+ target_file = sys .argv [1 ]
276
+ output_file = target_file
277
+ else :
278
+ # Reference mode - use reference file for organization
279
+ reference_file = sys .argv [1 ]
280
+ target_file = sys .argv [2 ]
281
+ output_file = sys .argv [3 ] if len (sys .argv ) == 4 else target_file
282
+
283
+ reorderer = PulsarReleaseReorderer ()
284
+ reorderer .reorder_file (reference_file , target_file , output_file )
285
+
286
+ if __name__ == "__main__" :
287
+ main ()
0 commit comments