Skip to content

Commit 76575ab

Browse files
committed
Add script for moving PR line items to categories in the release notes
1 parent 0f11e73 commit 76575ab

File tree

1 file changed

+287
-0
lines changed

1 file changed

+287
-0
lines changed
Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to reorder Pulsar release notes based on a reference file structure
4+
or reorganize a single file using component-based section assignment.
5+
6+
Single file mode: Takes one markdown file and reorganizes it by moving items
7+
to appropriate sections based on their component tags ([broker], [client], etc.)
8+
9+
Reference mode: Takes two markdown files and reorders the second file to match
10+
the section organization of the first (reference) file.
11+
"""
12+
13+
import re
14+
import sys
15+
from typing import Dict, List, Tuple, Optional
16+
from dataclasses import dataclass
17+
18+
@dataclass
19+
class ReleaseItem:
20+
"""Represents a single release note item"""
21+
full_line: str
22+
pr_number: Optional[str] = None
23+
component: Optional[str] = None
24+
type_tag: Optional[str] = None
25+
26+
class PulsarReleaseReorderer:
27+
def __init__(self):
28+
self.sections = [
29+
"Library updates",
30+
"Broker",
31+
"Client",
32+
"Pulsar IO and Pulsar Functions",
33+
"Others",
34+
"Tests & CI"
35+
]
36+
37+
def extract_pr_number(self, line: str) -> Optional[str]:
38+
"""Extract PR number from a line like ([#24360](...)"""
39+
match = re.search(r'\(\[#(\d+)\]', line)
40+
return match.group(1) if match else None
41+
42+
def extract_component_and_type(self, line: str) -> Tuple[Optional[str], Optional[str]]:
43+
"""Extract component and type from lines like [fix][broker] or [improve][client]"""
44+
match = re.match(r'- \[(\w+)\]\[(\w+)\]', line.strip())
45+
if match:
46+
return match.group(2), match.group(1) # component, type
47+
return None, None
48+
49+
def parse_file(self, filename: str) -> Dict[str, List[ReleaseItem]]:
50+
"""Parse a release notes file and return items organized by section"""
51+
sections = {section: [] for section in self.sections}
52+
current_section = None
53+
54+
with open(filename, 'r', encoding='utf-8') as f:
55+
content = f.read()
56+
57+
lines = content.split('\n')
58+
59+
for line in lines:
60+
# Check if line is a section header
61+
if line.startswith('### ') and line[4:] in self.sections:
62+
current_section = line[4:]
63+
continue
64+
65+
# Check if line is a release item (starts with '- [')
66+
if line.strip().startswith('- [') and current_section:
67+
pr_number = self.extract_pr_number(line)
68+
component, type_tag = self.extract_component_and_type(line)
69+
70+
item = ReleaseItem(
71+
full_line=line,
72+
pr_number=pr_number,
73+
component=component,
74+
type_tag=type_tag
75+
)
76+
sections[current_section].append(item)
77+
78+
return sections
79+
80+
def determine_section_for_item(self, item: ReleaseItem, reference_sections: Dict[str, List[ReleaseItem]]) -> str:
81+
"""Determine which section an item should go in based on reference file"""
82+
83+
# First, try to find the item by PR number in reference (if reference exists)
84+
if item.pr_number and reference_sections:
85+
for section_name, ref_items in reference_sections.items():
86+
for ref_item in ref_items:
87+
if ref_item.pr_number == item.pr_number:
88+
return section_name
89+
90+
# Check if line contains "upgrade" (case insensitive) - always goes to Library updates
91+
if "upgrade" in item.full_line.lower():
92+
return 'Library updates'
93+
94+
# Use component-based heuristics
95+
if item.component:
96+
component_to_section = {
97+
# Library updates
98+
'sec': 'Library updates', # Security updates often involve library upgrades
99+
100+
# Broker
101+
'admin': 'Broker',
102+
'broker': 'Broker',
103+
'meta': 'Broker',
104+
'ml': 'Broker', # Managed ledger
105+
'monitor': 'Broker',
106+
'offload': 'Broker',
107+
'schema': 'Broker',
108+
'storage': 'Broker',
109+
'txn': 'Broker', # Transaction
110+
'zk': 'Broker', # ZooKeeper
111+
'bk': 'Broker', # BookKeeper
112+
113+
# Client
114+
'client': 'Client',
115+
116+
# Pulsar IO and Pulsar Functions
117+
'fn': 'Pulsar IO and Pulsar Functions', # Functions
118+
'io': 'Pulsar IO and Pulsar Functions',
119+
120+
# Others
121+
'proxy': 'Others',
122+
'misc': 'Others',
123+
'doc': 'Others',
124+
'site': 'Others',
125+
'pip': 'Others',
126+
'ws': 'Others', # WebSocket
127+
'cli': 'Others', # Command line interface
128+
129+
# Tests & CI
130+
'build': 'Tests & CI',
131+
'ci': 'Tests & CI',
132+
'test': 'Tests & CI'
133+
}
134+
135+
if item.component in component_to_section:
136+
return component_to_section[item.component]
137+
138+
# Default fallback - place in Others
139+
return 'Others'
140+
141+
def get_section_order_position(self, item: ReleaseItem, reference_items: List[ReleaseItem]) -> int:
142+
"""Get the relative position of an item within its section based on reference"""
143+
if not item.pr_number:
144+
return 999 # Put items without PR numbers at the end
145+
146+
# Find the item in reference and return its position (if reference exists)
147+
if reference_items:
148+
for i, ref_item in enumerate(reference_items):
149+
if ref_item.pr_number == item.pr_number:
150+
return i
151+
152+
# If not found in reference or no reference, use component and type for ordering
153+
type_priority = {'fix': 0, 'improve': 1, 'feature': 2, 'cleanup': 3, 'refactor': 4, 'revert': 5}
154+
component_priority = {
155+
# Core components first
156+
'broker': 0, 'client': 1, 'admin': 2,
157+
# Storage and ledger
158+
'ml': 3, 'storage': 4, 'bk': 5, 'offload': 6,
159+
# Functions and IO
160+
'fn': 7, 'io': 8,
161+
# Infrastructure
162+
'proxy': 9, 'meta': 10, 'zk': 11, 'monitor': 12,
163+
# Protocol and communication
164+
'txn': 13, 'schema': 14,
165+
# Security and build
166+
'sec': 15, 'build': 16, 'ci': 17, 'test': 18,
167+
# Documentation and tooling
168+
'doc': 19, 'site': 20, 'pip': 21,
169+
# Others components
170+
'ws': 22, 'cli': 23,
171+
# Miscellaneous
172+
'misc': 24
173+
}
174+
175+
type_pos = type_priority.get(item.type_tag, 999)
176+
component_pos = component_priority.get(item.component, 999)
177+
178+
return 900 + type_pos * 10 + component_pos # Put new items near end but ordered
179+
180+
def reorder_file(self, reference_file: Optional[str], target_file: str, output_file: str):
181+
"""Main function to reorder the target file based on reference file structure"""
182+
183+
# Handle reference file parsing
184+
if reference_file:
185+
print(f"Parsing reference file: {reference_file}")
186+
reference_sections = self.parse_file(reference_file)
187+
else:
188+
print("Single file mode: Using component-based reorganization")
189+
reference_sections = {section: [] for section in self.sections}
190+
191+
print(f"Parsing target file: {target_file}")
192+
target_sections = self.parse_file(target_file)
193+
194+
# Read the original target file to preserve header and structure
195+
with open(target_file, 'r', encoding='utf-8') as f:
196+
original_content = f.read()
197+
198+
# Extract header (everything before first ###)
199+
header_match = re.search(r'^(.*?)(^### )', original_content, re.MULTILINE | re.DOTALL)
200+
header = header_match.group(1) if header_match else ""
201+
202+
# Collect all items from target file
203+
all_target_items = []
204+
for section_items in target_sections.values():
205+
all_target_items.extend(section_items)
206+
207+
print(f"Found {len(all_target_items)} items to reorder")
208+
209+
# Reorganize items by determining their proper sections
210+
new_sections = {section: [] for section in self.sections}
211+
212+
for item in all_target_items:
213+
proper_section = self.determine_section_for_item(item, reference_sections)
214+
new_sections[proper_section].append(item)
215+
216+
# Sort items within each section based on reference order
217+
for section_name in self.sections:
218+
reference_items = reference_sections.get(section_name, [])
219+
new_sections[section_name].sort(
220+
key=lambda item: self.get_section_order_position(item, reference_items)
221+
)
222+
223+
# Rebuild the file content
224+
new_content = header
225+
226+
for section_name in self.sections:
227+
items = new_sections[section_name]
228+
if items: # Only add section if it has items
229+
new_content += f"### {section_name}\n\n"
230+
for item in items:
231+
new_content += f"{item.full_line}\n"
232+
new_content += "\n"
233+
234+
# Add the changelog link if it exists in original
235+
changelog_match = re.search(r'For the complete list.*', original_content)
236+
if changelog_match:
237+
new_content += f"{changelog_match.group(0)}\n"
238+
239+
# Write the reordered file
240+
with open(output_file, 'w', encoding='utf-8') as f:
241+
f.write(new_content)
242+
243+
print(f"Reordered file written to: {output_file}")
244+
245+
if reference_file:
246+
print(f"Used reference file: {reference_file}")
247+
else:
248+
print("Used component-based reorganization (no reference file)")
249+
250+
# Print summary
251+
print("\nSection summary:")
252+
for section_name in self.sections:
253+
count = len(new_sections[section_name])
254+
if count > 0:
255+
print(f" {section_name}: {count} items")
256+
257+
def main():
258+
if len(sys.argv) < 2 or len(sys.argv) > 4:
259+
print("Usage: python reorder_pulsar_release.py <target_file>")
260+
print(" or: python reorder_pulsar_release.py <reference_file> <target_file> [output_file]")
261+
print()
262+
print("Single file mode: Reorganizes the file using component-based section assignment")
263+
print(" Example: python reorder_pulsar_release.py pulsar-3.3.8.md")
264+
print()
265+
print("Reference mode: Reorganizes target file based on reference file structure")
266+
print(" Example: python reorder_pulsar_release.py pulsar-3.0.13.md pulsar-3.3.8.md")
267+
print(" Example: python reorder_pulsar_release.py pulsar-3.0.13.md pulsar-3.3.8.md pulsar-3.3.8-reordered.md")
268+
print()
269+
print("If output_file is not provided, the target_file will be updated in place.")
270+
sys.exit(1)
271+
272+
if len(sys.argv) == 2:
273+
# Single file mode - reorganize based on components only
274+
reference_file = None
275+
target_file = sys.argv[1]
276+
output_file = target_file
277+
else:
278+
# Reference mode - use reference file for organization
279+
reference_file = sys.argv[1]
280+
target_file = sys.argv[2]
281+
output_file = sys.argv[3] if len(sys.argv) == 4 else target_file
282+
283+
reorderer = PulsarReleaseReorderer()
284+
reorderer.reorder_file(reference_file, target_file, output_file)
285+
286+
if __name__ == "__main__":
287+
main()

0 commit comments

Comments
 (0)