Skip to content

Commit 643fede

Browse files
authored
DAGE-465: Enhance stitchdata changelog process (#1129)
* DAGE-465: Fixed regex * DAGE-465: Changed target folder and dynamically fetch connection-id * DAGE-465: Added sentence in MD file * DAGE-465: Changed md text to f-text * DAGE-465: Added connection version * DAGE-465: Added connection name and process PR title * DAGE-465: Replaced md_filename with f-string * DAGE-465: Put back target folder creation * DAGE-465: Improved print * DAGE-465: Fixed pandas warning * DAGE-465: Guess the entry type from the PR title * DAGE-465: Removed test file
1 parent e710ff2 commit 643fede

File tree

1 file changed

+72
-12
lines changed

1 file changed

+72
-12
lines changed

scripts/changelog/changelog.py

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import requests, re, base64, json, datetime, os, pandas, sys
1+
import requests, re, base64, json, datetime, os, pandas, sys, yaml
22
from datetime import datetime as dt
33

44
# GitHub info
@@ -11,7 +11,8 @@
1111

1212

1313
# Folder for new files
14-
path = '../../_changelog-files/drafts'
14+
year = dt.today().strftime('%Y')
15+
path = f'../../_changelog-files/{year}'
1516

1617
start_date = (dt.today() - datetime.timedelta(days=nb_days)).date()
1718

@@ -20,13 +21,27 @@
2021
documented = []
2122
to_document = []
2223
to_ignore = []
24+
integration_dict = {}
2325

24-
def createDir(): # Check if the drafts folder exists and create it if it doesn't
26+
# Lists of words to guess the entry type
27+
bug_fix = ['fix', 'fixed', 'fixing']
28+
deprecation = ['deprecate', 'deprecated','deprecating', 'deprecation']
29+
improvement = ['improve', 'improved', 'improving', 'improvement', 'enhance', 'enhanced', 'enhancing', 'enhancement', 'update', 'updated', 'updating', 'upgrade', 'upgraded', 'upgrading']
30+
issue_identified = ['identify', 'identified', 'identifying']
31+
new_feature = ['new version']
32+
removed = ['remove', 'removed', 'removing', 'removal']
33+
34+
def createDir(): # Check if the target folder exists and create it if it doesn't
2535
if os.path.exists(path) == False:
2636
os.makedirs(path)
27-
else:
37+
else:
2838
pass
2939

40+
def createIntegrationDict(): # Create a dictionary of all integrations from the integrations.yml file
41+
with open('../../_data/taps/integrations.yml', 'r') as file:
42+
yaml_data = yaml.safe_load(file)
43+
integration_dict.update(yaml_data['integrations'])
44+
3045
def getPRsToIgnore(): # Check the ignore.txt file for PRs that shouldn't be documented
3146
with open('ignore.txt', 'r', encoding='utf8') as f:
3247
ignore = f.readlines()
@@ -47,7 +62,7 @@ def getDocumentedPRs(): # Get PRs that already have a changelog file
4762
# Find the line that contains the PR URL
4863
for line in changelog_lines:
4964
if line.startswith('pull-request:'):
50-
link = re.search('^pull-request\:\s\"(.*)\"$', line).group(1)
65+
link = re.search(r'^pull-request\:\s\"(.*)\"$', line).group(1)
5166

5267
# Add the URL to the list of PRs already added in the changelog
5368
documented.append(link)
@@ -116,8 +131,8 @@ def getPRsToDocument(): # Find PRs that need to be documented and create draft c
116131
prs = pandas.DataFrame(pr_list, columns=['repository', 'pr_number', 'pr_title', 'pr_url', 'pr_merge_date'])
117132

118133
for index, row in prs.iterrows():
119-
name = row[0]
120-
number = row[1]
134+
name = row.iloc[0]
135+
number = row.iloc[1]
121136

122137
# For each PR, check the files updated
123138
api = 'https://api.github.com/repos/singer-io/' + name + '/pulls/' + str(number) + '/files'
@@ -155,7 +170,8 @@ def getPRsToDocument(): # Find PRs that need to be documented and create draft c
155170
print(pr)
156171

157172
# If the PR is not already in the list of PRs to document, get the name of the tap, and the PR number, title, URL and merge date from the DataFrame
158-
tap = pr[0].replace('tap-', '')
173+
tap_raw = pr[0]
174+
tap = tap_raw.replace('tap-', '')
159175
pr_number = str(pr[1])
160176
pr_title = pr[2]
161177
pr_url = pr[3]
@@ -164,21 +180,65 @@ def getPRsToDocument(): # Find PRs that need to be documented and create draft c
164180
# Add the PR to the list of PRs to document
165181
to_document.append(pr)
166182

183+
# Get connection information from integrations.yaml
184+
connection_id_found = False
185+
connection_name_found = False
186+
for key, value in integration_dict.items():
187+
if value['tap'] == tap_raw:
188+
connection_id = value['id']
189+
connection_id_found = True
190+
connection_name = value['display_name']
191+
connection_name_found = True
192+
break
193+
if not connection_id_found:
194+
connection_id = 'NOT FOUND'
195+
if not connection_name_found:
196+
connection_name = 'NOT FOUND'
197+
198+
# Get latest connection version
199+
with open(f'../../_data/taps/versions/{connection_id}.yml', 'r') as file:
200+
yaml_data = yaml.safe_load(file)
201+
connection_version = yaml_data['latest-version']
202+
203+
# Process PR title
204+
pr_title = re.sub(r'\w*-\d*\s?:\s?', '', pr_title)
205+
pr_title_for_md_description = pr_title[0].lower() + pr_title[1:]
206+
pr_title_for_md_filename = pr_title.lower().replace(' ', '-').replace(':', '-').replace(',', '-').replace('.', '-').replace('--', '-')
207+
208+
# Guess the entry type from the PR title
209+
entry_type = 'NOT FOUND'
210+
pr_title_lower = pr_title.lower()
211+
entry_types = [
212+
('bug-fix', bug_fix),
213+
('deprecation', deprecation),
214+
('improvement', improvement),
215+
('issue-identified', issue_identified),
216+
('new-feature', new_feature),
217+
('removed', removed)
218+
]
219+
for type, keywords in entry_types:
220+
if any(word in pr_title_lower for word in keywords):
221+
entry_type = type
222+
break
223+
167224
# Create the filename and content of the changelog file and create it
168-
md_filename = path + '/' + pr_date + '-' + tap + '-' + pr_number + '.md'
169-
md_text = '---\ntitle: "' + pr_title + '"\ncontent-type: "changelog-entry"\ndate: ' + pr_date + '\nentry-type: \nentry-category: integration\nconnection-id: \nconnection-version: \npull-request: "' + pr_url + '"\n---\n{{ site.data.changelog.metadata.single-integration | flatify }}'
225+
md_filename = f'{path}/{pr_date}-{tap}-v{connection_version}-{pr_title_for_md_filename}.md'
226+
md_text = f'---\ntitle: "{connection_name} (v{connection_version}): {pr_title}"\ncontent-type: "changelog-entry"\ndate: {pr_date}\nentry-type: {entry_type}\nentry-category: integration\nconnection-id: {connection_id}\nconnection-version: {connection_version}\npull-request: "{pr_url}"\n---\n{{ site.data.changelog.metadata.single-integration | flatify }}\n\nWe\'ve improved our {{ this-connection.display_name }} (v{{ this-connection.this-version }}) integration to {pr_title_for_md_description}.'
170227
with open(md_filename, 'w') as out:
171228
out.write(md_text)
172229

173230
# Print results
174231
count = len(to_document)
175-
if count > 0:
176-
print(str(count) + ' pull requests to document')
232+
if count > 1:
233+
print(f'{str(count)} pull requests to document')
234+
elif count == 1:
235+
print('1 pull request to document')
177236
else:
178237
print('No pull requests to document')
179238

180239

181240
createDir()
241+
createIntegrationDict()
182242
getDocumentedPRs()
183243
getPRsToIgnore()
184244
getRepoList()

0 commit comments

Comments
 (0)