c5d09e04dbed670d764d4214cda4f0fcd8b9e9c3
[ghc.git] / testsuite / driver / perf_notes.py
1 #!/usr/bin/env python3
2
3 #
4 # (c) Jared Weakly 2017
5 #
6 # This file will be a utility to help facilitate the comparison of performance
7 # metrics across arbitrary commits. The file will produce a table comparing
8 # metrics between measurements taken for given commits in the environment
9 # (which defaults to 'local' if not given by --test-env).
10 #
11
12 import argparse
13 import re
14 import subprocess
15 import time
16 import sys
17
18 from collections import namedtuple
19 from math import ceil, trunc
20
21 from testutil import passed, failBecause
22
23
24 # Check if "git rev-parse" can be run successfully.
25 # True implies the current directory is a git repo.
26 def inside_git_repo():
27 try:
28 subprocess.check_call(['git', 'rev-parse', 'HEAD'],
29 stdout=subprocess.DEVNULL)
30 return True
31 except subprocess.CalledProcessError:
32 return False
33
34 # Check if the worktree is dirty.
35 def is_worktree_dirty():
36 return subprocess.check_output(['git', 'status', '--porcelain']) != b''
37
38 #
39 # Some data access functions. A the moment this uses git notes.
40 #
41
42 # The metrics (a.k.a stats) are named tuples, PerfStat, in this form:
43 #
44 # ( test_env : 'val', # Test environment.
45 # test : 'val', # Name of the test
46 # way : 'val',
47 # metric : 'val', # Metric being recorded
48 # value : 'val', # The statistic result e.g. runtime
49 # )
50
51 # All the fields of a metric (excluding commit field).
52 PerfStat = namedtuple('PerfStat', ['test_env','test','way','metric','value'])
53
54 class MetricChange:
55 NewMetric = 'NewMetric'
56 NoChange = 'NoChange'
57 Increase = 'Increase'
58 Decrease = 'Decrease'
59
60 def parse_perf_stat(stat_str):
61 field_vals = stat_str.strip('\t').split('\t')
62 return PerfStat(*field_vals)
63
64 # Get all recorded (in a git note) metrics for a given commit.
65 # Returns an empty array if the note is not found.
66 def get_perf_stats(commit='HEAD', namespace='perf'):
67 try:
68 log = subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'show', commit], stderr=subprocess.STDOUT).decode('utf-8')
69 except subprocess.CalledProcessError:
70 return []
71
72 log = log.strip('\n').split('\n')
73 log = list(filter(None, log))
74 log = [parse_perf_stat(stat_str) for stat_str in log]
75 return log
76
77 # Check if a str is in a 40 character git commit hash.
78 # str -> bool
79 _commit_hash_re = re.compile('[0-9a-f]' * 40)
80 def is_commit_hash(hash):
81 return _commit_hash_re.fullmatch(hash) != None
82
83 # Convert a <ref> to a commit hash code.
84 # str -> str
85 def commit_hash(commit):
86 if is_commit_hash(commit):
87 return commit
88 return subprocess.check_output(['git', 'rev-parse', commit], \
89 stderr=subprocess.STDOUT) \
90 .decode() \
91 .strip()
92
93 # Get allowed changes to performance. This is extracted from the commit message of
94 # the given commit in this form:
95 # Metric (Increase | Decrease) ['metric' | \['metrics',..\]] [\((test_env|way)='abc',...\)]: TestName01, TestName02, ...
96 # Returns a *dictionary* from test name to a *list* of items of the form:
97 # {
98 # 'direction': either 'Increase' or 'Decrease,
99 # 'metrics': ['metricA', 'metricB', ...],
100 # 'opts': {
101 # 'optionA': 'string value',
102 # 'optionB': 'string value', # e.g. test_env: "x86_64-linux"
103 # ...
104 # }
105 # }
106 _get_allowed_perf_changes_cache = {}
107 def get_allowed_perf_changes(commit='HEAD'):
108 global _get_allowed_perf_changes_cache
109 commit = commit_hash(commit)
110 if not commit in _get_allowed_perf_changes_cache:
111 commitByteStr = subprocess.check_output(\
112 ['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
113 _get_allowed_perf_changes_cache[commit] \
114 = parse_allowed_perf_changes(commitByteStr.decode())
115 return _get_allowed_perf_changes_cache[commit]
116
117 def parse_allowed_perf_changes(commitMsg):
118 # Helper regex. Non-capturing unless postfixed with Cap.
119 s = r"(?:\s*\n?\s+)" # Space, possible new line with an indent.
120 qstr = r"(?:'(?:[^'\\]|\\.)*')" # Quoted string.
121 qstrCap = r"(?:'((?:[^'\\]|\\.)*)')" # Quoted string. Captures the string without the quotes.
122 innerQstrList = r"(?:"+qstr+r"(?:"+s+r"?,"+s+r"?"+qstr+r")*)?" # Inside of a list of strings.gs.s..
123 qstrList = r"(?:\["+s+r"?"+innerQstrList+s+r"?\])" # A list of strings (using box brackets)..
124
125 exp = (r"^Metric"
126 +s+r"(Increase|Decrease)"
127 +s+r"?("+qstr+r"|"+qstrList+r")?" # Metric or list of metrics.s..
128 +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surrounded in parenthesis. (allow parenthases in quoted strings)
129 +s+r"?:?" # Optional ":"
130 +s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)" # Test names. Stop parsing on empty or non-indented new line.
131 )
132
133 matches = re.findall(exp, commitMsg, re.M)
134 changes = {}
135 for (direction, metrics_str, opts_str, tests_str) in matches:
136 tests = re.findall(r"(\w+)", tests_str)
137 for test in tests:
138 changes.setdefault(test, []).append({
139 'direction': direction,
140 'metrics': re.findall(qstrCap, metrics_str),
141 'opts': dict(re.findall(r"(\w+)"+s+r"?="+s+r"?"+qstrCap, opts_str))
142 })
143
144 return changes
145
146 # Calculates a suggested string to append to the git commit in order to accept the
147 # given changes.
148 # changes: [(MetricChange, PerfStat)]
149 def allow_changes_string(changes):
150 Dec = MetricChange.Decrease
151 Inc = MetricChange.Increase
152
153 # We only care about increase / decrease metrics.
154 changes = [change for change in changes if change[0] in [Inc, Dec]]
155
156 # Map tests to a map from change direction to metrics.
157 test_to_dir_to_metrics = {}
158 for (change, perf_stat) in changes:
159 change_dir_to_metrics = test_to_dir_to_metrics.setdefault(perf_stat.test, { Inc: [], Dec: [] })
160 change_dir_to_metrics[change].append(perf_stat.metric)
161
162 # Split into 3 groups.
163 # Tests where all changes are *increasing*.
164 # Tests where all changes are *decreasing*.
165 # Tests where changes are *mixed* increasing and decreasing.
166 groupDec = []
167 groupInc = []
168 groupMix = []
169 for (test, decsAndIncs) in test_to_dir_to_metrics.items():
170 decs = decsAndIncs[Dec]
171 incs = decsAndIncs[Inc]
172 if decs and incs:
173 groupMix.append(test)
174 elif not decs:
175 groupInc.append(test)
176 else:
177 groupDec.append(test)
178
179 msgs = []
180 nltab = '\n '
181
182 # Decreasing group.
183 if groupDec:
184 msgs.append('Metric Decrease:' + nltab + nltab.join(groupDec))
185
186 # Increasing group.
187 if groupInc:
188 msgs.append('Metric Increase:' + nltab + nltab.join(groupInc))
189
190 # Mixed group.
191 if groupMix:
192 # Split mixed group tests by decrease/increase, then by metric.
193 dir_to_metric_to_tests = {
194 Dec: {},
195 Inc: {}
196 }
197 for test in groupMix:
198 for change_dir, metrics in test_to_dir_to_metrics[test].items():
199 for metric in metrics:
200 dir_to_metric_to_tests[change_dir].setdefault(metric, []).append(test)
201
202 for change_dir in [Dec, Inc]:
203 metric_to_tests = dir_to_metric_to_tests[change_dir]
204 for metric in sorted(metric_to_tests.keys()):
205 tests = metric_to_tests[metric]
206 msgs.append('Metric ' + change_dir + ' \'' + metric + '\':' + nltab + nltab.join(tests))
207
208 return '\n\n'.join(msgs)
209
210 # Formats a list of metrics into a string. Used e.g. to save metrics to a file or git note.
211 def format_perf_stat(stats):
212 # If a single stat, convert to a singleton list.
213 if not isinstance(stats, list):
214 stats = [stats]
215
216 return "\n".join(["\t".join([str(stat_val) for stat_val in stat]) for stat in stats])
217
218 # Appends a list of metrics to the git note of the given commit.
219 # Tries up to max_tries times to write to git notes should it fail for some reason.
220 # Each retry will wait 1 second.
221 # Returns True if the note was successfully appended.
222 def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5):
223 # Append to git note
224 print('Appending ' + str(len(stats)) + ' stats to git notes.')
225 stats_str = format_perf_stat(stats)
226 def try_append():
227 try:
228 return subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'append', commit, '-m', stats_str])
229 except subprocess.CalledProcessError:
230 return b'Git - fatal'
231
232 tries = 0
233 while tries < max_tries:
234 if not b'Git - fatal' in try_append():
235 return True
236 tries += 1
237 time.sleep(1)
238
239 print("\nAn error occurred while writing the performance metrics to git notes.\n \
240 This is usually due to a lock-file existing somewhere in the git repo.")
241
242 return False
243
244 #
245 # Baseline calculation
246 #
247
248 # Max number of ancestor commits to search when compiling a baseline performance metric.
249 BaselineSearchDepth = 75
250
251 # The git notes name space for local results.
252 LocalNamespace = "perf"
253
254 # The git notes name space for ci results.
255 CiNamespace = "ci/" + LocalNamespace
256
257 # (isCalculated, best fit ci test_env or None)
258 BestFitCiTestEnv = (False, None)
259
260 # test_env string or None
261 def best_fit_ci_test_env():
262 global BestFitCiTestEnv
263 if not BestFitCiTestEnv[0]:
264 platform = sys.platform
265 isArch64 = sys.maxsize > 2**32
266 arch = "x86_64" if isArch64 else "i386"
267
268 if platform.startswith("linux"):
269 test_env = arch + "-linux-deb9"
270 elif platform.startswith("win32"):
271 # There are no windows CI test results.
272 test_env = None
273 elif isArch64 and platform.startswith("darwin"):
274 test_env = arch + "-darwin"
275 elif isArch64 and platform.startswith("freebsd"):
276 test_env = arch + "-freebsd"
277 else:
278 test_env = None
279
280 BestFitCiTestEnv = (True, test_env)
281
282 return BestFitCiTestEnv[1]
283
284 _baseline_depth_commit_log = {}
285
286 # Get the commit hashes for the last BaselineSearchDepth commits from and
287 # including the input commit. The output commits are all commit hashes.
288 # str -> [str]
289 def baseline_commit_log(commit):
290 global _baseline_depth_commit_log
291 commit = commit_hash(commit)
292 if not commit in _baseline_depth_commit_log:
293 _baseline_depth_commit_log[commit] = \
294 subprocess.check_output(['git', 'log', '--format=%H', \
295 '-n' + str(BaselineSearchDepth)]) \
296 .decode().split('\n')
297 return _baseline_depth_commit_log[commit]
298
299 # Cache of baseline values. This is a dict of dicts indexed on:
300 # (useCiNamespace, commit) -> (test_env, test, metric, way) -> baseline
301 # (bool , str ) -> (str , str , str , str) -> float
302 _commit_metric_cache = {}
303
304 # Get the baseline (expected value) of a test at a given commit. This searches
305 # git notes from older commits for recorded metrics (locally and from ci). More
306 # recent commits are favoured, then local results over ci results are favoured.
307 #
308 # commit: str - must be a commit hash (see commit_has())
309 # name: str - test name
310 # test_env: str - test environment (note a best fit test_env will be used
311 # instead when looking for ci results)
312 # metric: str - test metric
313 # way: str - test way
314 # returns: the baseline float or None if no metric was found within
315 # BaselineSearchDepth commits and since the last expected change.
316 def baseline_metric(commit, name, test_env, metric, way):
317 # For performance reasons (in order to avoid calling commit_hash), we assert
318 # commit is already a commit hash.
319 assert is_commit_hash(commit)
320
321 # Get all recent commit hashes.
322 commit_hashes = baseline_commit_log(commit)
323
324 # TODO PERF use git log to get hashes of all BaselineSearchDepth commits
325 def depth_to_commit(depth):
326 return commit_hashes[depth]
327
328 def has_expected_change(commit):
329 return get_allowed_perf_changes(commit).get(name) \
330 != None
331
332 # Bool -> String
333 def namespace(useCiNamespace):
334 return CiNamespace if useCiNamespace else LocalNamespace
335
336 ci_test_env = best_fit_ci_test_env()
337
338 # gets the metric of a given commit
339 # (Bool, Int) -> (float | None)
340 def commit_metric(useCiNamespace, currentCommit):
341 global _commit_metric_cache
342
343 # Get test environment.
344 effective_test_env = ci_test_env if useCiNamespace else test_env
345 if effective_test_env == None:
346 # This can happen when no best fit ci test is found.
347 return None
348
349 # Check for cached value.
350 cacheKeyA = (useCiNamespace, currentCommit)
351 cacheKeyB = (effective_test_env, name, metric, way)
352 if cacheKeyA in _commit_metric_cache:
353 return _commit_metric_cache[cacheKeyA].get(cacheKeyB)
354
355 # Cache miss.
356 # Calculate baselines from the current commit's git note.
357 # Note that the git note may contain data for other tests. All tests'
358 # baselines will be collected and cached for future use.
359 allCommitMetrics = get_perf_stats(
360 currentCommit,
361 namespace(useCiNamespace))
362
363 # Collect recorded values by cacheKeyB.
364 values_by_cache_key_b = {}
365 for perfStat in allCommitMetrics:
366 currentCacheKey = (perfStat.test_env, perfStat.test, \
367 perfStat.metric, perfStat.way)
368 currentValues = values_by_cache_key_b.setdefault(currentCacheKey, [])
369 currentValues.append(float(perfStat.value))
370
371 # Calculate and baseline (average of values) by cacheKeyB.
372 baseline_by_cache_key_b = {}
373 for currentCacheKey, currentValues in values_by_cache_key_b.items():
374 baseline_by_cache_key_b[currentCacheKey] = \
375 sum(currentValues) / len(currentValues)
376
377 # Save baselines to the cache.
378 _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
379 return baseline_by_cache_key_b.get(cacheKeyB)
380
381 # Searches through previous commits trying local then ci for each commit in.
382 def search(useCiNamespace, depth):
383 # Stop if reached the max search depth, or if
384 # there is an expected change at the child commit (depth-1). This is a
385 # subtlety: Metrics recorded on commit x incorporate the expected
386 # changes for commit x. Hence metrics from x are still a valid baseline,
387 # while older commits are not. This is why we check for expected changes
388 # on depth-1 rather than depth.
389 if depth >= BaselineSearchDepth or has_expected_change( \
390 depth_to_commit(depth - 1)):
391 return None
392
393 # Check for a metric on this commit.
394 current_metric = commit_metric(useCiNamespace, depth_to_commit(depth))
395 if current_metric != None:
396 return current_metric
397
398 # Metric is not available.
399 # If tried local, now try CI. Else move to the parent commit.
400 if not useCiNamespace:
401 return search(True, depth)
402 else:
403 return search(False, depth + 1)
404
405 # Start search from parent commit using local name space.
406 return search(False, 1)
407
408
409 # Check test stats. This prints the results for the user.
410 # actual: the PerfStat with actual value.
411 # expected_val: the expected value (this should generally be derived from get_perf_stats())
412 # tolerance_dev: allowed deviation of the actual value from the expected value.
413 # allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
414 # force_print: Print stats even if the test stat was in the tolerance range.
415 # Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are withing the expected value ranges.
416 def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes = {}, force_print = False):
417 full_name = actual.test + ' (' + actual.way + ')'
418
419 lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100))
420 upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100)))
421
422 actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
423
424 # Find the direction of change.
425 change = MetricChange.NoChange
426 if actual.value < lowerBound:
427 change = MetricChange.Decrease
428 elif actual.value > upperBound:
429 change = MetricChange.Increase
430
431 # Is the change allowed?
432 allowed_change_directions = [MetricChange.NoChange] + [ allow_stmt['direction']
433 for allow_stmt in allowed_perf_changes.get(actual.test, [])
434
435 # List of metrics are not specified or the metric is in the list of metrics.
436 if not allow_stmt['metrics'] or actual.metric in allow_stmt['metrics']
437
438 # way/test are not specified, or match the actual way/test.
439 if ((not 'way' in allow_stmt['opts'].keys()) or actual.way == allow_stmt['opts']['way'])
440 if ((not 'test_env' in allow_stmt['opts'].keys()) or actual.test_env == allow_stmt['opts']['test_env'])
441 ]
442 change_allowed = change in allowed_change_directions
443
444 # Print errors and create pass/fail object.
445 result = passed()
446 if not change_allowed:
447 error = change + ' not allowed'
448 print(actual.metric, error + ':')
449 result = failBecause('stat ' + error, tag='stat')
450
451 if not change_allowed or force_print:
452 length = max(len(str(x)) for x in [expected_val, lowerBound, upperBound, actual.value])
453
454 def display(descr, val, extra):
455 print(descr, str(val).rjust(length), extra)
456
457 display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%')
458 display(' Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '')
459 display(' Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '')
460 display(' Actual ' + full_name + ' ' + actual.metric + ':', actual.value, '')
461 if actual.value != expected_val:
462 display(' Deviation ' + full_name + ' ' + actual.metric + ':', actual_dev, '%')
463
464 return (change, result)
465
466 if __name__ == '__main__':
467 parser = argparse.ArgumentParser()
468 parser.add_argument("--test-env",
469 help="The given test environment to be compared.")
470 parser.add_argument("--test-name",
471 help="If given, filters table to include only \
472 tests matching the given regular expression.")
473 parser.add_argument("--add-note", nargs=3,
474 help="Development only. --add-note N commit seed \
475 Adds N fake metrics to the given commit using the random seed.")
476 parser.add_argument("commits", nargs=argparse.REMAINDER,
477 help="The rest of the arguments will be the commits that will be used.")
478 args = parser.parse_args()
479
480 env = 'local'
481 name = re.compile('.*')
482 # metrics is a tuple (str commit, PerfStat stat)
483 CommitAndStat = namedtuple('CommitAndStat', ['commit', 'stat'])
484 metrics = []
485 singleton_commit = len(args.commits) == 1
486
487 #
488 # Main logic of the program when called from the command-line.
489 #
490
491 if args.commits:
492 for c in args.commits:
493 metrics += [CommitAndStat(c, stat) for stat in get_perf_stats(c)]
494
495 if args.test_env:
496 metrics = [test for test in metrics if test.stat.test_env == args.test_env]
497
498 if args.test_name:
499 nameRe = re.compile(args.test_name)
500 metrics = [test for test in metrics if nameRe.search(test.test)]
501
502 if args.add_note:
503 def note_gen(n, commit, delta=''):
504 note = []
505 # Generates simple fake data. Likely not comprehensive enough to catch all edge cases.
506 if not delta:
507 note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*1000)) for i in range(1,int(int(n)/2)+1)])
508 note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*100)) for i in range(int(int(n)/2)+1,int(n)+1)])
509 if delta:
510 hu = abs(hash(delta))
511 hv = abs(hash(hu))
512 u = int(hu % 100)
513 v = int(hv % 10)
514 note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*u)) for i in range(1,int(int(n)/2)+1)])
515 note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*v)) for i in range(int(int(n)/2)+1,int(n)+1)])
516
517 append_perf_stat(note, commit)
518
519 note_gen(args.add_note[0],args.add_note[1],args.add_note[2])
520
521 #
522 # String utilities for pretty-printing
523 #
524
525 row_fmt = '{:18}' * len(args.commits)
526 commits = row_fmt.format(*[c[:10] for c in args.commits])
527
528 def cmtline(insert):
529 return row_fmt.format(*[insert for c in args.commits]).strip()
530
531 def header(unit):
532 first_line = "{:27}{:30}".format(' ',' ') + cmtline(unit)
533 second_line = ("{:27}{:30}".format('Test','Metric') + commits).strip()
534
535 # Test Metric c1 c2 c3 ...
536 print("-" * (len(second_line)+1))
537 print(first_line)
538 print(second_line)
539 print("-" * (len(second_line)+1))
540
541 def commit_string(test, flag):
542 def delta(v1, v2):
543 return round((100 * (v1 - v2)/v2),2)
544
545 # Get the average value per commit (or None if that commit contains no metrics).
546 # Note: if the test environment is not set, this will combine metrics from all test environments.
547 averageValuesOrNones = []
548 for commit in args.commits:
549 values = [float(t.stat.value) for t in metrics if t.commit == commit and t.stat.test == test]
550 if values == []:
551 averageValuesOrNones.append(None)
552 else:
553 averageValuesOrNones.append(sum(values) / len(values))
554
555 if flag == 'metrics':
556 strings = [str(v) if v != None else '-' for v in averageValuesOrNones]
557 if flag == 'percentages':
558 # If the baseline commit has no stats, then we can not produce any percentages.
559 baseline = averageValuesOrNones[0]
560 if baseline == None:
561 strings = ['-' for v in averageValuesOrNones]
562 else:
563 baseline = float(baseline)
564 strings = ['-' if val == None else str(delta(baseline,float(val))) + '%' for val in averageValuesOrNones]
565
566 return row_fmt.format(*strings).strip()
567
568 #
569 # The pretty-printed output
570 #
571
572 header('commit')
573 # Printing out metrics.
574 all_tests = sorted(set([(test.stat.test, test.stat.metric) for test in metrics]))
575 for test, metric in all_tests:
576 print("{:27}{:30}".format(test, metric) + commit_string(test,'metrics'))
577
578 # Has no meaningful output if there is no commit to compare to.
579 if not singleton_commit:
580 header('percent')
581
582 # Printing out percentages.
583 for test, metric in all_tests:
584 print("{:27}{:30}".format(test, metric) + commit_string(test,'percentages'))