Revert "Batch merge"
[ghc.git] / testsuite / driver / perf_notes.py
1 #!/usr/bin/env python3
2
3 #
4 # (c) Jared Weakly 2017
5 #
6 # This file will be a utility to help facilitate the comparison of performance
7 # metrics across arbitrary commits. The file will produce a table comparing
8 # metrics between measurements taken for given commits in the environment
9 # (which defaults to 'local' if not given by --test-env).
10 #
11
12 import argparse
13 import re
14 import subprocess
15 import time
16
17 from collections import namedtuple
18 from math import ceil, trunc
19
20 from testutil import passed, failBecause
21
22
23 # Check if "git rev-parse" can be run successfully.
24 # True implies the current directory is a git repo.
25 def inside_git_repo():
26 try:
27 subprocess.check_call(['git', 'rev-parse', 'HEAD'],
28 stdout=subprocess.DEVNULL)
29 return True
30 except subprocess.CalledProcessError:
31 return False
32
33 # Check if the worktree is dirty.
34 def is_worktree_dirty():
35 return subprocess.check_output(['git', 'status', '--porcelain']) != b''
36
37 #
38 # Some data access functions. A the moment this uses git notes.
39 #
40
41 # The metrics (a.k.a stats) are named tuples, PerfStat, in this form:
42 #
43 # ( test_env : 'val', # Test environment.
44 # test : 'val', # Name of the test
45 # way : 'val',
46 # metric : 'val', # Metric being recorded
47 # value : 'val', # The statistic result e.g. runtime
48 # )
49
50 # All the fields of a metric (excluding commit field).
51 PerfStat = namedtuple('PerfStat', ['test_env','test','way','metric','value'])
52
53 class MetricChange:
54 NewMetric = 'NewMetric'
55 NoChange = 'NoChange'
56 Increase = 'Increase'
57 Decrease = 'Decrease'
58
59 def parse_perf_stat(stat_str):
60 field_vals = stat_str.strip('\t').split('\t')
61 return PerfStat(*field_vals)
62
63 # Get all recorded (in a git note) metrics for a given commit.
64 # Returns an empty array if the note is not found.
65 def get_perf_stats(commit='HEAD', namespace='perf'):
66 try:
67 log = subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'show', commit], stderr=subprocess.STDOUT).decode('utf-8')
68 except subprocess.CalledProcessError:
69 return []
70
71 log = log.strip('\n').split('\n')
72 log = list(filter(None, log))
73 log = [parse_perf_stat(stat_str) for stat_str in log]
74 return log
75
76
77 # Get allowed changes to performance. This is extracted from the commit message of
78 # the given commit in this form:
79 # Metric (Increase | Decrease) ['metric' | \['metrics',..\]] [\((test_env|way)='abc',...\)]: TestName01, TestName02, ...
80 # Returns a *dictionary* from test name to a *list* of items of the form:
81 # {
82 # 'direction': either 'Increase' or 'Decrease,
83 # 'metrics': ['metricA', 'metricB', ...],
84 # 'opts': {
85 # 'optionA': 'string value',
86 # 'optionB': 'string value',
87 # ...
88 # }
89 # }
90 def get_allowed_perf_changes(commit='HEAD'):
91 commitByteStr = subprocess.check_output(['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
92 return parse_allowed_perf_changes(commitByteStr.decode())
93
94 def parse_allowed_perf_changes(commitMsg):
95 # Helper regex. Non-capturing unless postfixed with Cap.
96 s = r"(?:\s*\n?\s+)" # Space, possible new line with an indent.
97 qstr = r"(?:'(?:[^'\\]|\\.)*')" # Quoted string.
98 qstrCap = r"(?:'((?:[^'\\]|\\.)*)')" # Quoted string. Captures the string without the quotes.
99 innerQstrList = r"(?:"+qstr+r"(?:"+s+r"?,"+s+r"?"+qstr+r")*)?" # Inside of a list of strings.gs.s..
100 qstrList = r"(?:\["+s+r"?"+innerQstrList+s+r"?\])" # A list of strings (using box brackets)..
101
102 exp = (r"^Metric"
103 +s+r"(Increase|Decrease)"
104 +s+r"?("+qstr+r"|"+qstrList+r")?" # Metric or list of metrics.s..
105 +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surounded in parenthesis. (allow parenthases in quoted strings))
106 +s+r"?:?" # Optional ":"
107 +s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)" # Test names. Stop parsing on empty or non-indented new line.
108 )
109
110 matches = re.findall(exp, commitMsg, re.M)
111 changes = {}
112 for (direction, metrics_str, opts_str, tests_str) in matches:
113 tests = re.findall(r"(\w+)", tests_str)
114 for test in tests:
115 changes.setdefault(test, []).append({
116 'direction': direction,
117 'metrics': re.findall(qstrCap, metrics_str),
118 'opts': dict(re.findall(r"(\w+)"+s+r"?="+s+r"?"+qstrCap, opts_str))
119 })
120
121 return changes
122
123 # Calculates a suggested string to append to the git commit in order to accept the
124 # given changes.
125 # changes: [(MetricChange, PerfStat)]
126 def allow_changes_string(changes):
127 Dec = MetricChange.Decrease
128 Inc = MetricChange.Increase
129
130 # We only care about increase / decrease metrics.
131 changes = [change for change in changes if change[0] in [Inc, Dec]]
132
133 # Map tests to a map from change direction to metrics.
134 test_to_dir_to_metrics = {}
135 for (change, perf_stat) in changes:
136 change_dir_to_metrics = test_to_dir_to_metrics.setdefault(perf_stat.test, { Inc: [], Dec: [] })
137 change_dir_to_metrics[change].append(perf_stat.metric)
138
139 # Split into 3 groups.
140 # Tests where all changes are *increasing*.
141 # Tests where all changes are *decreasing*.
142 # Tests where changes are *mixed* increasing and decreasing.
143 groupDec = []
144 groupInc = []
145 groupMix = []
146 for (test, decsAndIncs) in test_to_dir_to_metrics.items():
147 decs = decsAndIncs[Dec]
148 incs = decsAndIncs[Inc]
149 if decs and incs:
150 groupMix.append(test)
151 elif not decs:
152 groupInc.append(test)
153 else:
154 groupDec.append(test)
155
156 msgs = []
157 nltab = '\n '
158
159 # Decreasing group.
160 if groupDec:
161 msgs.append('Metric Decrease:' + nltab + nltab.join(groupDec))
162
163 # Increasing group.
164 if groupInc:
165 msgs.append('Metric Increase:' + nltab + nltab.join(groupInc))
166
167 # Mixed group.
168 if groupMix:
169 # Split mixed group tests by decrease/increase, then by metric.
170 dir_to_metric_to_tests = {
171 Dec: {},
172 Inc: {}
173 }
174 for test in groupMix:
175 for change_dir, metrics in test_to_dir_to_metrics[test].items():
176 for metric in metrics:
177 dir_to_metric_to_tests[change_dir].setdefault(metric, []).append(test)
178
179 for change_dir in [Dec, Inc]:
180 metric_to_tests = dir_to_metric_to_tests[change_dir]
181 for metric in sorted(metric_to_tests.keys()):
182 tests = metric_to_tests[metric]
183 msgs.append('Metric ' + change_dir + ' \'' + metric + '\':' + nltab + nltab.join(tests))
184
185 return '\n\n'.join(msgs)
186
187 # Formats a list of metrics into a string. Used e.g. to save metrics to a file or git note.
188 def format_perf_stat(stats):
189 # If a single stat, convert to a singleton list.
190 if not isinstance(stats, list):
191 stats = [stats]
192
193 return "\n".join(["\t".join([str(stat_val) for stat_val in stat]) for stat in stats])
194
195 # Appends a list of metrics to the git note of the given commit.
196 # Tries up to max_tries times to write to git notes should it fail for some reason.
197 # Each retry will wait 1 second.
198 # Returns True if the note was successfully appended.
199 def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5):
200 # Append to git note
201 print('Appending ' + str(len(stats)) + ' stats to git notes.')
202 stats_str = format_perf_stat(stats)
203 def try_append():
204 try:
205 return subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'append', commit, '-m', stats_str])
206 except subprocess.CalledProcessError:
207 return b'Git - fatal'
208
209 tries = 0
210 while tries < max_tries:
211 if not b'Git - fatal' in try_append():
212 return True
213 tries += 1
214 time.sleep(1)
215
216 print("\nAn error occured while writing the performance metrics to git notes.\n \
217 ‚Äč This is usually due to a lock-file existing somewhere in the git repo.")
218
219 return False
220
221 # Check test stats. This prints the results for the user.
222 # actual: the PerfStat with actual value.
223 # expected_val: the expected value (this should generally be derived from get_perf_stats())
224 # tolerance_dev: allowed deviation of the actual value from the expected value.
225 # allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
226 # force_print: Print stats even if the test stat was in the tolerance range.
227 # Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are withing the expected value ranges.
228 def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes = {}, force_print = False):
229 full_name = actual.test + ' (' + actual.way + ')'
230
231 lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100))
232 upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100)))
233
234 actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
235
236 # Find the direction of change.
237 change = MetricChange.NoChange
238 if actual.value < lowerBound:
239 change = MetricChange.Decrease
240 elif actual.value > upperBound:
241 change = MetricChange.Increase
242
243 # Is the change allowed?
244 allowed_change_directions = [MetricChange.NoChange] + [ allow_stmt['direction']
245 for allow_stmt in allowed_perf_changes.get(actual.test, [])
246
247 # List of metrics are not specified or the metric is in the list of metrics.
248 if not allow_stmt['metrics'] or actual.metric in allow_stmt['metrics']
249
250 # way/test are not specified, or match the actual way/test.
251 if ((not 'way' in allow_stmt['opts'].keys()) or actual.way == allow_stmt['opts']['way'])
252 if ((not 'test_env' in allow_stmt['opts'].keys()) or actual.test_env == allow_stmt['opts']['test_env'])
253 ]
254 change_allowed = change in allowed_change_directions
255
256 # Print errors and create pass/fail object.
257 result = passed()
258 if not change_allowed:
259 error = change + ' not allowed'
260 print(actual.metric, error + ':')
261 result = failBecause('stat ' + error, tag='stat')
262
263 if not change_allowed or force_print:
264 length = max(len(str(x)) for x in [expected_val, lowerBound, upperBound, actual.value])
265
266 def display(descr, val, extra):
267 print(descr, str(val).rjust(length), extra)
268
269 display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%')
270 display(' Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '')
271 display(' Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '')
272 display(' Actual ' + full_name + ' ' + actual.metric + ':', actual.value, '')
273 if actual.value != expected_val:
274 display(' Deviation ' + full_name + ' ' + actual.metric + ':', actual_dev, '%')
275
276 return (change, result)
277
278 if __name__ == '__main__':
279 parser = argparse.ArgumentParser()
280 parser.add_argument("--test-env",
281 help="The given test environment to be compared.")
282 parser.add_argument("--test-name",
283 help="If given, filters table to include only \
284 tests matching the given regular expression.")
285 parser.add_argument("--add-note", nargs=3,
286 help="Development only. --add-note N commit seed \
287 Adds N fake metrics to the given commit using the random seed.")
288 parser.add_argument("commits", nargs=argparse.REMAINDER,
289 help="The rest of the arguments will be the commits that will be used.")
290 args = parser.parse_args()
291
292 env = 'local'
293 name = re.compile('.*')
294 # metrics is a tuple (str commit, PerfStat stat)
295 CommitAndStat = namedtuple('CommitAndStat', ['commit', 'stat'])
296 metrics = []
297 singleton_commit = len(args.commits) == 1
298
299 #
300 # Main logic of the program when called from the command-line.
301 #
302
303 if args.commits:
304 for c in args.commits:
305 metrics += [CommitAndStat(c, stat) for stat in get_perf_stats(c)]
306
307 if args.test_env:
308 metrics = [test for test in metrics if test.stat.test_env == args.test_env]
309
310 if args.test_name:
311 nameRe = re.compile(args.test_name)
312 metrics = [test for test in metrics if nameRe.search(test.test)]
313
314 if args.add_note:
315 def note_gen(n, commit, delta=''):
316 note = []
317 # Generates simple fake data. Likely not comprehensive enough to catch all edge cases.
318 if not delta:
319 note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*1000)) for i in range(1,int(int(n)/2)+1)])
320 note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*100)) for i in range(int(int(n)/2)+1,int(n)+1)])
321 if delta:
322 hu = abs(hash(delta))
323 hv = abs(hash(hu))
324 u = int(hu % 100)
325 v = int(hv % 10)
326 note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*u)) for i in range(1,int(int(n)/2)+1)])
327 note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*v)) for i in range(int(int(n)/2)+1,int(n)+1)])
328
329 append_perf_stat(note, commit)
330
331 note_gen(args.add_note[0],args.add_note[1],args.add_note[2])
332
333 #
334 # String utilities for pretty-printing
335 #
336
337 row_fmt = '{:18}' * len(args.commits)
338 commits = row_fmt.format(*[c[:10] for c in args.commits])
339
340 def cmtline(insert):
341 return row_fmt.format(*[insert for c in args.commits]).strip()
342
343 def header(unit):
344 first_line = "{:27}{:30}".format(' ',' ') + cmtline(unit)
345 second_line = ("{:27}{:30}".format('Test','Metric') + commits).strip()
346
347 # Test Metric c1 c2 c3 ...
348 print("-" * (len(second_line)+1))
349 print(first_line)
350 print(second_line)
351 print("-" * (len(second_line)+1))
352
353 def commit_string(test, flag):
354 def delta(v1, v2):
355 return round((100 * (v1 - v2)/v2),2)
356
357 # Get the average value per commit (or None if that commit contains no metrics).
358 # Note: if the test environment is not set, this will combine metrics from all test environments.
359 averageValuesOrNones = []
360 for commit in args.commits:
361 values = [float(t.stat.value) for t in metrics if t.commit == commit and t.stat.test == test]
362 if values == []:
363 averageValuesOrNones.append(None)
364 else:
365 averageValuesOrNones.append(sum(values) / len(values))
366
367 if flag == 'metrics':
368 strings = [str(v) if v != None else '-' for v in averageValuesOrNones]
369 if flag == 'percentages':
370 # If the baseline commit has no stats, then we can not produce any percentages.
371 baseline = averageValuesOrNones[0]
372 if baseline == None:
373 strings = ['-' for v in averageValuesOrNones]
374 else:
375 baseline = float(baseline)
376 strings = ['-' if val == None else str(delta(baseline,float(val))) + '%' for val in averageValuesOrNones]
377
378 return row_fmt.format(*strings).strip()
379
380 #
381 # The pretty-printed output
382 #
383
384 header('commit')
385 # Printing out metrics.
386 all_tests = sorted(set([(test.stat.test, test.stat.metric) for test in metrics]))
387 for test, metric in all_tests:
388 print("{:27}{:30}".format(test, metric) + commit_string(test,'metrics'))
389
390 # Has no meaningful output if there is no commit to compare to.
391 if not singleton_commit:
392 header('percent')
393
394 # Printing out percentages.
395 for test, metric in all_tests:
396 print("{:27}{:30}".format(test, metric) + commit_string(test,'percentages'))