#!/usr/bin/env python3
# Copyright (C) 2020 Free Software Foundation, Inc.
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING. If not, write to
# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
# and adds a skeleton ChangeLog file to the file. It does not try to be
# too smart when parsing function names, but it produces a reasonable
# approximation.
#
# Author: Martin Liska <mliska@suse.cz>
import argparse
import datetime
import os
import re
import subprocess
import sys
from itertools import takewhile
import requests
from unidiff import PatchSet
pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
dg_regex = re.compile(r'{\s+dg-(error|warning)')
pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
comment_regex = re.compile(r'^\/\*')
struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
template_and_param_regex = re.compile(r'<[^<>]*>')
md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
'include_fields=summary,component'
function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
# NB: Makefile.in isn't listed as it's not always generated.
generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
help_message = """\
Generate ChangeLog template for PATCH.
PATCH must be generated using diff(1)'s -up or -cp options
(or their equivalent in git).
"""
script_folder = os.path.realpath(__file__)
root = os.path.dirname(os.path.dirname(script_folder))
firstpr = ''
def find_changelog(path):
folder = os.path.split(path)[0]
while True:
if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
return folder
folder = os.path.dirname(folder)
if folder == '':
return folder
raise AssertionError()
def extract_function_name(line):
if comment_regex.match(line):
return None
m = struct_regex.search(line)
if m:
# Struct declaration
return m.group(1) + ' ' + m.group(3)
m = macro_regex.search(line)
if m:
# Macro definition
return m.group(2)
m = super_macro_regex.search(line)
if m:
# Supermacro
return m.group(1)
m = fn_regex.search(line)
if m:
# Discard template and function parameters.
fn = m.group(1)
fn = re.sub(template_and_param_regex, '', fn)
return fn.rstrip()
return None
def try_add_function(functions, line):
fn = extract_function_name(line)
if fn and fn not in functions:
functions.append(fn)
return bool(fn)
def sort_changelog_files(changed_file):
return (changed_file.is_added_file, changed_file.is_removed_file)
def get_pr_titles(prs):
output = []
for idx, pr in enumerate(prs):
pr_id = pr.split('/')[-1]
r = requests.get(bugzilla_url % pr_id)
bugs = r.json()['bugs']
if len(bugs) == 1:
prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
if out not in output:
output.append(out)
if output:
output.append('')
return '\n'.join(output)
def generate_changelog(data, no_functions=False, fill_pr_titles=False,
additional_prs=None):
changelogs = {}
changelog_list = []
prs = []
out = ''
diff = PatchSet(data)
global firstpr
if additional_prs:
prs = [pr for pr in additional_prs if pr not in prs]
for file in diff:
# skip files that can't be parsed
if file.path == '/dev/null':
continue
changelog = find_changelog(file.path)
if changelog not in changelogs:
changelogs[changelog] = []
changelog_list.append(changelog)
changelogs[changelog].append(file)
# Extract PR entries from newly added tests
if 'testsuite' in file.path and file.is_added_file:
# Only search first ten lines as later lines may
# contains commented code which a note that it
# has not been tested due to a certain PR or DR.
this_file_prs = []
for line in list(file)[0][0:10]:
m = pr_regex.search(line.value)
if m:
pr = m.group('pr')
if pr not in prs:
prs.append(pr)
this_file_prs.append(pr.split('/')[-1])
else:
m = dr_regex.search(line.value)
if m:
dr = m.group('dr')
if dr not in prs:
prs.append(dr)
this_file_prs.append(dr.split('/')[-1])
elif dg_regex.search(line.value):
# Found dg-warning/dg-error line
break
# PR number in the file name
fname = os.path.basename(file.path)
m = pr_filename_regex.search(fname)
if m:
pr = m.group('pr')
pr2 = 'PR ' + pr
if pr not in this_file_prs and pr2 not in prs:
prs.append(pr2)
if prs:
firstpr = prs[0]
if fill_pr_titles:
out += get_pr_titles(prs)
# print list of PR entries before ChangeLog entries
if prs:
if not out:
out += '\n'
for pr in prs:
out += '\t%s\n' % pr
out += '\n'
# sort ChangeLog so that 'testsuite' is at the end
for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
files = changelogs[changelog]
out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
out += '\n'
# new and deleted files should be at the end
for file in sorted(files, key=sort_changelog_files):
assert file.path.startswith(changelog)
in_tests = 'testsuite' in changelog or 'testsuite' in file.path
relative_path = file.path[len(changelog):].lstrip('/')
functions = []
if file.is_added_file:
msg = 'New test' if in_tests else 'New file'
out += '\t* %s: %s.\n' % (relative_path, msg)
elif file.is_removed_file:
out += '\t* %s: Removed.\n' % (relative_path)
elif hasattr(file, 'is_rename') and file.is_rename:
out += '\t* %s: Moved to...\n' % (relative_path)
new_path = file.target_file[2:]
# A file can be theoretically moved to a location that
# belongs to a different ChangeLog. Let user fix it.
if new_path.startswith(changelog):
new_path = new_path[len(changelog):].lstrip('/')
out += '\t* %s: ...here.\n' % (new_path)
elif os.path.basename(file.path) in generated_files:
out += '\t* %s: Regenerate.\n' % (relative_path)
else:
if not no_functions:
for hunk in file:
# Do not add function names for testsuite files
extension = os.path.splitext(relative_path)[1]
if not in_tests and extension in function_extensions:
last_fn = None
modified_visited = False
success = False
for line in hunk:
m = identifier_regex.match(line.value)
if line.is_added or line.is_removed:
# special-case definition in .md files
m2 = md_def_regex.match(line.value)
if extension == '.md' and m2:
fn = m2.group(1)
if fn not in functions:
functions.append(fn)
last_fn = None
success = True
if not line.value.strip():
continue
modified_visited = True
if m and try_add_function(functions,
m.group(1)):
last_fn = None
success = True
elif line.is_context:
if last_fn and modified_visited:
try_add_function(functions, last_fn)
last_fn = None
modified_visited = False
success = True
elif m:
last_fn = m.group(1)
modified_visited = False
if not success:
try_add_function(functions,
hunk.section_header)
if functions:
out += '\t* %s (%s):\n' % (relative_path, functions[0])
for fn in functions[1:]:
out += '\t(%s):\n' % fn
else:
out += '\t* %s:\n' % relative_path
out += '\n'
return out
def update_copyright(data):
current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
username = subprocess.check_output('git config user.name', shell=True,
encoding='utf8').strip()
email = subprocess.check_output('git config user.email', shell=True,
encoding='utf8').strip()
changelogs = set()
diff = PatchSet(data)
for file in diff:
changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
if changelog not in changelogs:
changelogs.add(changelog)
with open(changelog) as f:
content = f.read()
with open(changelog, 'w+') as f:
f.write(f'{current_timestamp} {username} <{email}>\n\n')
f.write('\tUpdate copyright years.\n\n')
f.write(content)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=help_message)
parser.add_argument('input', nargs='?',
help='Patch file (or missing, read standard input)')
parser.add_argument('-b', '--pr-numbers', action='store',
type=lambda arg: arg.split(','), nargs='?',
help='Add the specified PRs (comma separated)')
parser.add_argument('-s', '--no-functions', action='store_true',
help='Do not generate function names in ChangeLogs')
parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
help='Download title of mentioned PRs')
parser.add_argument('-d', '--directory',
help='Root directory where to search for ChangeLog '
'files')
parser.add_argument('-c', '--changelog',
help='Append the ChangeLog to a git commit message '
'file')
parser.add_argument('--update-copyright', action='store_true',
help='Update copyright in ChangeLog files')
args = parser.parse_args()
if args.input == '-':
args.input = None
if args.directory:
root = args.directory
data = open(args.input) if args.input else sys.stdin
if args.update_copyright:
update_copyright(data)
else:
output = generate_changelog(data, args.no_functions,
args.fill_up_bug_titles, args.pr_numbers)
if args.changelog:
lines = open(args.changelog).read().split('\n')
start = list(takewhile(lambda l: not l.startswith('#'), lines))
end = lines[len(start):]
with open(args.changelog, 'w') as f:
if not start or not start[0]:
# initial commit subject line 'component: [PRnnnnn]'
m = prnum_regex.match(firstpr)
if m:
title = f'{m.group("comp")}: [PR{m.group("num")}]'
start.insert(0, title)
if start:
# append empty line
if start[-1] != '':
start.append('')
else:
# append 2 empty lines
start = 2 * ['']
f.write('\n'.join(start))
f.write('\n')
f.write(output)
f.write('\n'.join(end))
else:
print(output, end='')