[KYUUBI #6074] Add a script to simplify the process of creating release notes
# 🔍 Description ## Issue References 🔗 Currently, we use a rather primitive way to manually write release notes from scratch, and some of the mechanical and repetitive work can be simplified by the scripts. ## Describe Your Solution 🔧 Adds a script to simplify the process of creating release notes. Note: it just simplifies some processes, the release manager still needs to tune the outputs by hand. ## Types of changes 🔖 - [ ] Bugfix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan 🧪 ``` RELEASE_TAG=v1.8.1 PREVIOUS_RELEASE_TAG=v1.8.0 build/release/pre_gen_release_notes.py ``` ``` $ head build/release/commits-v1.8.1.txt [KYUUBI #5981] Deploy Spark Hive connector with Scala 2.13 to Maven Central [KYUUBI #6058] Make Jetty server stop timeout configurable [KYUUBI #5952][1.8] Disconnect connections without running operations after engine maxlife time graceful period [KYUUBI #6048] Assign serviceNode and add volatile for variables [KYUUBI #5991] Error on reading Atlas properties composed of multi values [KYUUBI #6045] [REST] Sync the AdminRestApi with the AdminResource Apis [KYUUBI #6047] [CI] Free up disk space [KYUUBI #6036] JDBC driver conditional sets fetchSize on opening session [KYUUBI #6028] Exited spark-submit process should not block batch submit queue [KYUUBI #6018] Speed up GetTables operation for Spark session catalog ``` ``` $ head build/release/contributors-v1.8.1.txt * Shaoyun Chen -- [KYUUBI #5857][KYUUBI #5720][KYUUBI #5785][KYUUBI #5617] * Chao Chen -- [KYUUBI #5750] * Flyangz -- [KYUUBI #5832] * Pengqi Li -- [KYUUBI #5713] * Bowen Liang -- [KYUUBI #5730][KYUUBI #5802][KYUUBI #5767][KYUUBI #5831][KYUUBI #5801][KYUUBI #5754][KYUUBI #5626][KYUUBI #5811][KYUUBI #5853][KYUUBI #5765] * Paul Lin -- [KYUUBI #5799][KYUUBI #5814] * Senmiao Liu -- [KYUUBI #5969][KYUUBI #5244] * Xiao Liu -- [KYUUBI #5962] * Peiyue Liu -- [KYUUBI #5331] * Junjie Ma -- [KYUUBI #5789] ``` --- # Checklist 📝 - [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) **Be nice. Be informative.** Closes #6074 from pan3793/release-script. Closes #6074 3d5ec20a4 [Cheng Pan] credits 176527995 [Cheng Pan] Add a script to simplify the process of creating release notes Authored-by: Cheng Pan <chengpan@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
parent
a30a28c791
commit
2c70c67fd1
262
build/release/pre_gen_release_notes.py
Executable file
262
build/release/pre_gen_release_notes.py
Executable file
@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This script is inspired by Apache Spark
|
||||
|
||||
# This script simplifies the process of creating release notes, it
|
||||
# - folds the original and the revert commits
|
||||
# - filters out unrelated commits
|
||||
# - generates the contributor list
|
||||
# - canonicalizes the contributors' name with the known_translations
|
||||
|
||||
# TODO
|
||||
# - canonicalizes the commits' title
|
||||
|
||||
# Usage:
|
||||
# set environment variables: RELEASE_TAG and PREVIOUS_RELEASE_TAG, then perform
|
||||
# ./pre_gen_release_notes.py
|
||||
# Example:
|
||||
# RELEASE_TAG=v1.8.1 PREVIOUS_RELEASE_TAG=v1.8.0 ./pre_gen_release_notes.py
|
||||
|
||||
# It outputs
|
||||
# - commits-${RELEASE_TAG}.txt: the canonical commit list
|
||||
# - contributors-${RELEASE_TAG}.txt: the canonical contributor list
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from release_utils import (
|
||||
tag_exists,
|
||||
get_commits,
|
||||
yes_or_no_prompt,
|
||||
get_date,
|
||||
is_valid_author,
|
||||
capitalize_author,
|
||||
print_indented
|
||||
)
|
||||
|
||||
RELEASE_TAG = os.environ.get("RELEASE_TAG")
|
||||
if RELEASE_TAG is None:
|
||||
sys.exit("RELEASE_TAG is required")
|
||||
if not tag_exists(RELEASE_TAG):
|
||||
sys.exit("RELEASE_TAG: %s does not exist!" % RELEASE_TAG)
|
||||
|
||||
PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG")
|
||||
if PREVIOUS_RELEASE_TAG is None:
|
||||
sys.exit("PREVIOUS_RELEASE_TAG is required")
|
||||
if not tag_exists(PREVIOUS_RELEASE_TAG):
|
||||
sys.exit("PREVIOUS_RELEASE_TAG: %s does not exist!" % PREVIOUS_RELEASE_TAG)
|
||||
|
||||
release_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
commits_file_name = "commits-%s.txt" % RELEASE_TAG
|
||||
contributors_file_name = "contributors-%s.txt" % RELEASE_TAG
|
||||
|
||||
# Gather commits found in the new tag but not in the old tag.
|
||||
# This filters commits based on both the git hash and the PR number.
|
||||
# If either is present in the old tag, then we ignore the commit.
|
||||
print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
|
||||
release_commits = get_commits(RELEASE_TAG)
|
||||
previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
|
||||
previous_release_hashes = set()
|
||||
previous_release_prs = set()
|
||||
for old_commit in previous_release_commits:
|
||||
previous_release_hashes.add(old_commit.get_hash())
|
||||
if old_commit.get_pr_number():
|
||||
previous_release_prs.add(old_commit.get_pr_number())
|
||||
new_commits = []
|
||||
for this_commit in release_commits:
|
||||
this_hash = this_commit.get_hash()
|
||||
this_pr_number = this_commit.get_pr_number()
|
||||
if this_hash in previous_release_hashes:
|
||||
continue
|
||||
if this_pr_number and this_pr_number in previous_release_prs:
|
||||
continue
|
||||
new_commits.append(this_commit)
|
||||
if not new_commits:
|
||||
sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
|
||||
|
||||
# Prompt the user for confirmation that the commit range is correct
|
||||
print("\n==================================================================================")
|
||||
print("Release tag: %s" % RELEASE_TAG)
|
||||
print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG)
|
||||
print("Number of commits in this range: %s" % len(new_commits))
|
||||
print("")
|
||||
|
||||
if yes_or_no_prompt("Show all commits?"):
|
||||
print_indented(new_commits)
|
||||
print("==================================================================================\n")
|
||||
if not yes_or_no_prompt("Does this look correct?"):
|
||||
sys.exit("Ok, exiting")
|
||||
|
||||
# Filter out special commits
|
||||
releases = []
|
||||
reverts = []
|
||||
no_tickets = []
|
||||
effective_commits = []
|
||||
|
||||
def is_release(commit_title):
|
||||
return "[release]" in commit_title.lower()
|
||||
|
||||
|
||||
def has_no_ticket(commit_title):
|
||||
return not re.findall("\\[KYUUBI\\s\\#[0-9]+\\]", commit_title.upper())
|
||||
|
||||
|
||||
def is_revert(commit_title):
|
||||
return "revert" in commit_title.lower()
|
||||
|
||||
|
||||
for c in new_commits:
|
||||
t = c.get_title()
|
||||
if not t:
|
||||
continue
|
||||
elif is_release(t):
|
||||
releases.append(c)
|
||||
elif is_revert(t):
|
||||
reverts.append(c)
|
||||
elif has_no_ticket(t):
|
||||
no_tickets.append(c)
|
||||
else:
|
||||
effective_commits.append(c)
|
||||
|
||||
|
||||
# Warn against ignored commits
|
||||
if releases or reverts or no_tickets:
|
||||
print("\n==================================================================================")
|
||||
if releases:
|
||||
print("Found %d release commits" % len(releases))
|
||||
if reverts:
|
||||
print("Found %d revert commits" % len(reverts))
|
||||
if no_tickets:
|
||||
print("Found %d commits with no Ticket" % len(no_tickets))
|
||||
print("==================== Warning: these commits will be ignored ======================\n")
|
||||
if releases:
|
||||
print("Release (%d)" % len(releases))
|
||||
print_indented(releases)
|
||||
if reverts:
|
||||
print("Revert (%d)" % len(reverts))
|
||||
print_indented(reverts)
|
||||
if no_tickets:
|
||||
print("No Ticket (%d)" % len(no_tickets))
|
||||
print_indented(no_tickets)
|
||||
print("==================== Warning: the above commits will be ignored ==================\n")
|
||||
prompt_msg = "%d effective commits left to process after filtering. OK to proceed?" % len(effective_commits)
|
||||
if not yes_or_no_prompt(prompt_msg):
|
||||
sys.exit("OK, exiting.")
|
||||
|
||||
|
||||
# Load known author translations that are cached locally
|
||||
known_translations = {}
|
||||
known_translations_file_name = "known_translations"
|
||||
known_translations_file = open(os.path.join(release_dir, known_translations_file_name), "r")
|
||||
for line in known_translations_file:
|
||||
if line.startswith("#") or not line.strip():
|
||||
continue
|
||||
[old_name, new_name] = line.strip("\n").split(" - ")
|
||||
known_translations[old_name] = new_name
|
||||
known_translations_file.close()
|
||||
|
||||
# Keep track of warnings to tell the user at the end
|
||||
warnings = []
|
||||
|
||||
# Mapping from the invalid author name to its associated tickets
|
||||
# E.g. pan3793 -> set("[KYUUBI #1234]", "[KYUUBI #1235]")
|
||||
invalid_authors = {}
|
||||
|
||||
# Populate a map that groups issues and components by author
|
||||
# It takes the form: Author Name -> set()
|
||||
# For instance,
|
||||
# {
|
||||
# 'Cheng Pan' -> set('[KYUUBI #1234]', '[KYUUBI #1235]'),
|
||||
# 'Fu Chen' -> set('[KYUUBI #2345]')
|
||||
# }
|
||||
#
|
||||
author_info = {}
|
||||
print("\n=========================== Compiling contributor list ===========================")
|
||||
for commit in effective_commits:
|
||||
_hash = commit.get_hash()
|
||||
title = commit.get_title()
|
||||
issues = re.findall("\\[KYUUBI\\s\\#[0-9]+\\]", title.upper())
|
||||
author = commit.get_author()
|
||||
date = get_date(_hash)
|
||||
# Translate the known author name
|
||||
if author in known_translations:
|
||||
author = known_translations[author]
|
||||
elif is_valid_author(author):
|
||||
# If the author name is invalid, keep track of it along
|
||||
# with all associated issues so we can translate it later
|
||||
author = capitalize_author(author)
|
||||
else:
|
||||
if author not in invalid_authors:
|
||||
invalid_authors[author] = set()
|
||||
for issue in issues:
|
||||
invalid_authors[author].add(issue)
|
||||
# Populate or merge an issue into author_info[author]
|
||||
def populate(issues):
|
||||
if author not in author_info:
|
||||
author_info[author] = set()
|
||||
for issue in issues:
|
||||
author_info[author].add(issue)
|
||||
# Find issues associated with this commit
|
||||
try:
|
||||
populate(issues)
|
||||
except Exception as e:
|
||||
print("Unexpected error:", e)
|
||||
print(" Processed commit %s authored by %s on %s" % (_hash, author, date))
|
||||
print("==================================================================================\n")
|
||||
|
||||
commits_file = open(os.path.join(release_dir, commits_file_name), "w")
|
||||
for commit in effective_commits:
|
||||
if commit.get_hash() not in map(lambda revert: revert.get_revert_hash(), reverts):
|
||||
commits_file.write(commit.title + "\n")
|
||||
for commit in no_tickets:
|
||||
commits_file.write(commit.title + "\n")
|
||||
commits_file.close()
|
||||
print("Commits list is successfully written to %s!" % commits_file_name)
|
||||
|
||||
# Write to contributors file ordered by author names
|
||||
# Each line takes the format " * Author Name -- tickets"
|
||||
# e.g. * Cheng Pan -- [KYUUBI #1234][KYUUBI #1235]
|
||||
# e.g. * Fu Chen -- [KYUUBI #2345]
|
||||
contributors_file = open(os.path.join(release_dir, contributors_file_name), "w")
|
||||
authors = list(author_info.keys())
|
||||
authors.sort(key=lambda author: author.split(" ")[-1])
|
||||
author_max_len = max(len(author) for author in authors)
|
||||
for author in authors:
|
||||
contribution = "".join(author_info[author])
|
||||
line = ("* {:<%s}" % author_max_len).format(author) + " -- " + contribution
|
||||
contributors_file.write(line + "\n")
|
||||
contributors_file.close()
|
||||
print("Contributors list is successfully written to %s!" % contributors_file_name)
|
||||
|
||||
# Prompt the user to translate author names if necessary
|
||||
if invalid_authors:
|
||||
warnings.append("Found the following invalid authors:")
|
||||
for a in invalid_authors:
|
||||
warnings.append("\t%s" % a)
|
||||
warnings.append("Please update 'known_translations'.")
|
||||
|
||||
# Log any warnings encountered in the process
|
||||
if warnings:
|
||||
print("\n============ Warnings encountered while creating the contributor list ============")
|
||||
for w in warnings:
|
||||
print(w)
|
||||
print("Please correct these in the final contributors list at %s." % contributors_file_name)
|
||||
print("==================================================================================\n")
|
||||
159
build/release/release_utils.py
Executable file
159
build/release/release_utils.py
Executable file
@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This script is inspired by Apache Spark
|
||||
|
||||
# This file contains helper methods used in creating a release.
|
||||
|
||||
import re
|
||||
import sys
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
|
||||
# Prompt the user to answer yes or no until they do so
|
||||
def yes_or_no_prompt(msg):
|
||||
response = input("%s [y/n]: " % msg)
|
||||
while response != "y" and response != "n":
|
||||
return yes_or_no_prompt(msg)
|
||||
return response == "y"
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
return Popen(cmd, stdout=PIPE).communicate()[0].decode("utf8")
|
||||
|
||||
|
||||
def run_cmd_error(cmd):
|
||||
return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1].decode("utf8")
|
||||
|
||||
|
||||
def get_date(commit_hash):
|
||||
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
|
||||
|
||||
|
||||
def tag_exists(tag):
|
||||
stderr = run_cmd_error(["git", "show", tag])
|
||||
return "error" not in stderr and "fatal" not in stderr
|
||||
|
||||
|
||||
# A type-safe representation of a commit
|
||||
class Commit:
|
||||
def __init__(self, _hash, author, title, pr_number=None, revert_hash=None):
|
||||
self._hash = _hash
|
||||
self.author = author
|
||||
self.title = title
|
||||
self.pr_number = pr_number
|
||||
self.revert_hash = revert_hash
|
||||
|
||||
def get_hash(self):
|
||||
return self._hash
|
||||
|
||||
def get_author(self):
|
||||
return self.author
|
||||
|
||||
def get_title(self):
|
||||
return self.title
|
||||
|
||||
def get_pr_number(self):
|
||||
return self.pr_number
|
||||
|
||||
def get_revert_hash(self):
|
||||
return self.revert_hash
|
||||
|
||||
def __str__(self):
|
||||
closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
|
||||
revert_commit = "(Reverts %s)" % self.revert_hash if self.revert_hash else ""
|
||||
return "%s %s %s %s %s" % (self._hash, self.author, self.title, closes_pr, revert_commit)
|
||||
|
||||
|
||||
# Return all commits that belong to the specified tag.
|
||||
#
|
||||
# Under the hood, this runs a `git log` on that tag and parses the fields
|
||||
# from the command output to construct a list of Commit objects. Note that
|
||||
# because certain fields reside in the commit description, we need to do
|
||||
# some intelligent regex parsing to extract those fields.
|
||||
def get_commits(tag):
|
||||
commit_start_marker = "|=== COMMIT START MARKER ===|"
|
||||
commit_end_marker = "|=== COMMIT END MARKER ===|"
|
||||
field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
|
||||
log_format = (
|
||||
commit_start_marker
|
||||
+ "%h"
|
||||
+ field_end_marker
|
||||
+ "%an"
|
||||
+ field_end_marker
|
||||
+ "%s"
|
||||
+ commit_end_marker
|
||||
+ "%b"
|
||||
)
|
||||
output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag])
|
||||
commits = []
|
||||
raw_commits = [c for c in output.split(commit_start_marker) if c]
|
||||
for commit in raw_commits:
|
||||
if commit.count(commit_end_marker) != 1:
|
||||
print("Commit end marker not found in commit: ")
|
||||
for line in commit.split("\n"):
|
||||
print(line)
|
||||
sys.exit(1)
|
||||
# Separate commit digest from the body
|
||||
# From the digest we extract the hash, author and the title
|
||||
# From the body, we extract the PR number and the github username
|
||||
[commit_digest, commit_body] = commit.split(commit_end_marker)
|
||||
if commit_digest.count(field_end_marker) != 2:
|
||||
sys.exit("Unexpected format in commit: %s" % commit_digest)
|
||||
[_hash, author, title] = commit_digest.split(field_end_marker)
|
||||
# The PR number and github username is in the commit message
|
||||
# itself and cannot be accessed through any GitHub API
|
||||
pr_number = None
|
||||
match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
|
||||
if match:
|
||||
[pr_number, github_username] = match.groups()
|
||||
# If the author name is not valid, use the github
|
||||
# username so we can translate it properly later
|
||||
if not is_valid_author(author):
|
||||
author = github_username
|
||||
author = author.strip()
|
||||
revert_hash = None
|
||||
match = re.search("This reverts commit ([0-9a-f]+)", commit_body)
|
||||
if match:
|
||||
[revert_hash] = match.groups()
|
||||
revert_hash = revert_hash[:9]
|
||||
commit = Commit(_hash, author, title, pr_number, revert_hash)
|
||||
commits.append(commit)
|
||||
return commits
|
||||
|
||||
|
||||
# Return whether the given name is in the form <First Name><space><Last Name>
|
||||
def is_valid_author(author):
|
||||
if not author:
|
||||
return False
|
||||
return " " in author and not re.findall("[0-9]", author)
|
||||
|
||||
|
||||
# Capitalize the first letter of each word in the given author name
|
||||
def capitalize_author(author):
|
||||
if not author:
|
||||
return None
|
||||
words = author.split(" ")
|
||||
words = [w[0].capitalize() + w[1:] for w in words if w]
|
||||
return " ".join(words)
|
||||
|
||||
|
||||
def print_indented(_list):
|
||||
for x in _list:
|
||||
print(" %s" % x)
|
||||
Loading…
Reference in New Issue
Block a user