1- """
2- This script generates a base prompt for OpenAI to create release notes.
3- """
4-
5- #!/usr/bin/env python3
6-
71import os
2+ import subprocess
3+ import json
4+ import re
5+ import tiktoken # type: ignore
86from datetime import datetime ;
97from pytz import timezone
108
119GITHUB_OUTPUT = os .getenv ("GITHUB_OUTPUT" )
10+ BASE_REF = os .getenv ("BASE_REF" , "main" )
11+ HEAD_SHA = os .environ ["HEAD_SHA" ]
12+ PR_TITLE = os .environ ["PR_TITLE" ]
13+ PR_BODY = os .environ ["PR_BODY" ]
14+ EXISTING_NOTES = os .environ .get ("EXISTING_NOTES" , "null" )
15+ MODEL_NAME = os .environ .get ('MODEL_NAME' , 'gpt-3.5-turbo-16k' )
16+ CUSTOM_PROMPT = os .environ .get ('CUSTOM_PROMPT' , '' )
17+
18+ def extract_description_section (pr_body ):
19+ # Find content between ## Description and the next ## or end of text
20+ description_match = re .search (r'## Description\s*\n(.*?)(?=\n##|$)' , pr_body , re .DOTALL )
21+ if description_match :
22+ content = description_match .group (1 ).strip ()
23+ # Remove the comment line if it exists
24+ comment_pattern = r'\[comment\]:.+?\n'
25+ content = re .sub (comment_pattern , '' , content )
26+ return content .strip ()
27+ return ""
28+
29+ def extract_ellipsis_important (pr_body ):
30+ # Find content between <!-- ELLIPSIS_HIDDEN --> and <!-- ELLIPSIS_HIDDEN --> that contains [!IMPORTANT]
31+ ellipsis_match = re .search (r'<!--\s*ELLIPSIS_HIDDEN\s*-->(.*?)<!--\s*ELLIPSIS_HIDDEN\s*-->' , pr_body , re .DOTALL )
32+ if ellipsis_match :
33+ content = ellipsis_match .group (1 ).strip ()
34+ important_match = re .search (r'\[!IMPORTANT\](.*?)(?=\[!|$)' , content , re .DOTALL )
35+ if important_match :
36+ important_text = important_match .group (1 ).strip ()
37+ important_text = re .sub (r'^-+\s*' , '' , important_text )
38+ return important_text .strip ()
39+ return ""
40+
41+ def extract_coderabbit_summary (pr_body ):
42+ # Find content between ## Summary by CodeRabbit and the next ## or end of text
43+ summary_match = re .search (r'## Summary by CodeRabbit\s*\n(.*?)(?=\n##|$)' , pr_body , re .DOTALL )
44+ return summary_match .group (1 ).strip () if summary_match else ""
45+
46+ def num_tokens_from_string (string : str , model_name : str ) -> int :
47+ """
48+ Calculate the number of tokens in a text string for a specific model.
49+
50+ Args:
51+ string: The input text to count tokens for
52+ model_name: Name of the OpenAI model to use for token counting
53+
54+ Returns:
55+ int: Number of tokens in the input string
56+ """
57+ encoding = tiktoken .encoding_for_model (model_name )
58+ num_tokens = len (encoding .encode (string ))
59+ return num_tokens
60+
61+ def truncate_to_token_limit (text , max_tokens , model_name ):
62+ """
63+ Truncate text to fit within a maximum token limit for a specific model.
64+
65+ Args:
66+ text: The input text to truncate
67+ max_tokens: Maximum number of tokens allowed
68+ model_name: Name of the OpenAI model to use for tokenization
69+
70+ Returns:
71+ str: Truncated text that fits within the token limit
72+ """
73+ encoding = tiktoken .encoding_for_model (model_name )
74+ encoded = encoding .encode (text )
75+ truncated = encoded [:max_tokens ]
76+ return encoding .decode (truncated )
1277
78+ # Extract sections and combine into PR_OVERVIEW
79+ description = extract_description_section (PR_BODY )
80+ important = extract_ellipsis_important (PR_BODY )
81+ summary = extract_coderabbit_summary (PR_BODY )
82+
83+ PR_OVERVIEW = "\n \n " .join (filter (None , [description , important , summary ]))
84+
85+ # Get git information
86+ base_sha = subprocess .getoutput (f"git rev-parse origin/{ BASE_REF } " ) if BASE_REF == 'main' else BASE_REF
87+ diff_overview = subprocess .getoutput (f"git diff { base_sha } ..{ HEAD_SHA } --name-status | awk '{{print $2}}' | sort | uniq -c | awk '{{print $2 \" : \" $1 \" files changed\" }}'" )
88+ git_log = subprocess .getoutput (f"git log { base_sha } ..{ HEAD_SHA } --pretty=format:'%h - %s (%an)' --reverse | head -n 50" )
89+ git_diff = subprocess .getoutput (f"git diff { base_sha } ..{ HEAD_SHA } --minimal --abbrev --ignore-cr-at-eol --ignore-space-at-eol --ignore-space-change --ignore-all-space --ignore-blank-lines --unified=0 --diff-filter=ACDMRT" )
90+
91+ max_tokens = 14000 # Reserve some tokens for the response
92+ changes_summary = truncate_to_token_limit (diff_overview , 1000 , MODEL_NAME )
93+ git_logs = truncate_to_token_limit (git_log , 2000 , MODEL_NAME )
94+ changes_diff = truncate_to_token_limit (git_diff , max_tokens - num_tokens_from_string (changes_summary , MODEL_NAME ) - num_tokens_from_string (git_logs , MODEL_NAME ) - 1000 , MODEL_NAME )
95+
96+ # Get today's existing changelog if any
97+ existing_changelog = EXISTING_NOTES if EXISTING_NOTES != "null" else None
98+ existing_changelog_text = f"\n Additional context:\n { existing_changelog } " if existing_changelog else ""
1399TODAY = datetime .now (timezone ('US/Eastern' )).isoformat (sep = ' ' , timespec = 'seconds' )
14100
15- BASE_PROMPT = f"""Based on the following 'PR Information', please generate concise and informative release notes to be read by developers.
101+ BASE_PROMPT = CUSTOM_PROMPT if CUSTOM_PROMPT else f"""Based on the following 'PR Information', please generate concise and informative release notes to be read by developers.
16102Format the release notes with markdown, and always use this structure: a descriptive and very short title (no more than 8 words) with heading level 2, a paragraph with a summary of changes (no header), and if applicable, sections for '🚀 New Features & Improvements', '🐛 Bugs Fixed' and '🔧 Other Updates', with heading level 3, skip respectively the sections if not applicable.
17103Finally include the following markdown comment with the PR merged date: <!-- PR_DATE: { TODAY } -->.
18104Avoid being repetitive and focus on the most important changes and their impact, discard any mention of version bumps/updates, changeset files, environment variables or syntax updates.
19105PR Information:"""
20106
107+ OPENAI_PROMPT = f"""{ BASE_PROMPT }
108+ Git log summary:
109+ { changes_summary }
110+ Commit Messages:
111+ { git_logs }
112+ PR Title:
113+ { PR_TITLE }
114+ PR Overview:
115+ { PR_OVERVIEW } { existing_changelog_text }
116+ Code Diff:
117+ { json .dumps (changes_diff )} """
118+
119+ print ("OpenAI Prompt" )
120+ print ("----------------------------------------------------------------" )
121+ print (OPENAI_PROMPT )
122+
21123# Write the prompt to GITHUB_OUTPUT
22124with open (GITHUB_OUTPUT , "a" ) as outputs_file :
23- outputs_file .write (f"BASE_PROMPT <<EOF\n { BASE_PROMPT } \n EOF" )
125+ outputs_file .write (f"OPENAI_PROMPT <<EOF\n { OPENAI_PROMPT } \n EOF" )
0 commit comments