summaryrefslogtreecommitdiff
path: root/testing/runtimes/writeruntimes.py
blob: d67d5b00556f603ed8084fbe359088139114c5cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from argparse import ArgumentParser
from collections import defaultdict
import json
import os
import sys

import requests

here = os.path.abspath(os.path.dirname(__file__))

ACTIVE_DATA_URL = "http://activedata.allizom.org/query"
PERCENTILE = 0.5 # ignore the bottom PERCENTILE*100% of numbers

def query_activedata(suite, e10s, platforms=None):
    platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else ''

    e10s_clause = '"eq":{"run.type":"e10s"}'
    if not e10s:
        e10s_clause = '"not":{%s}' % e10s_clause

    query = """
{
    "from":"unittest",
    "limit":200000,
    "groupby":["result.test"],
    "select":{"value":"result.duration","aggregate":"average"},
    "where":{"and":[
        {"eq":{"run.suite":"%s"%s}},
        {%s},
        {"gt":{"run.timestamp":"{{today-week}}"}}
    ]}
}
""" % (suite, platforms, e10s_clause)

    response = requests.post(ACTIVE_DATA_URL,
                             data=query,
                             stream=True)
    response.raise_for_status()
    data = response.json()["data"]
    return data

def write_runtimes(data, suite, indir=here, outdir=here):
    data = dict(data)

    outfilename = os.path.join(outdir, "%s.runtimes.json" % suite)
    infilename = os.path.join(indir, "%s.runtimes.json" % suite)
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # read in existing data, if any
    indata = None
    if os.path.exists(infilename):
        with open(infilename, 'r') as f:
            indata = json.loads(f.read()).get('runtimes')

    # identify a threshold of durations, below which we ignore
    runtimes = []
    for result in data.itervalues():
        duration = int(result * 1000) if result else 0
        if duration:
            runtimes.append(duration)
    runtimes.sort()
    threshold = runtimes[int(len(runtimes) * PERCENTILE)]

    # split the durations into two groups; omitted and specified
    omitted = []
    specified = indata if indata else {}
    current_tests = []
    for test, duration in data.iteritems():
        current_tests.append(test)
        duration = int(duration * 1000) if duration else 0
        if duration > 0 and duration < threshold:
            omitted.append(duration)
            if test in specified:
                del specified[test]
        elif duration >= threshold and test != "automation.py":
            original = specified.get(test, 0)
            if not original or abs(original - duration) > (original/20):
                # only write new data if it's > 20% different than original
                specified[test] = duration

    # delete any test references no longer needed
    to_delete = []
    for test in specified:
        if test not in current_tests:
            to_delete.append(test)
    for test in to_delete:
        del specified[test]

    avg = int(sum(omitted)/len(omitted))

    results = {'excluded_test_average': avg,
               'runtimes': specified}

    with open(outfilename, 'w') as f:
        f.write(json.dumps(results, indent=2, sort_keys=True))


def cli(args=sys.argv[1:]):
    parser = ArgumentParser()
    parser.add_argument('-o', '--output-directory', dest='outdir',
        default=here, help="Directory to save runtime data.")

    parser.add_argument('-i', '--input-directory', dest='indir',
        default=here, help="Directory from which to read current runtime data.")

    parser.add_argument('-p', '--platforms', default=None,
        help="Comma separated list of platforms from which to generate data.")

    parser.add_argument('-s', '--suite', dest='suite', default=None,
        help="Suite for which to generate data.")

    parser.add_argument('--disable-e10s', dest='e10s', default=True,
        action='store_false', help="Generate runtimes for non-e10s tests.")

    args = parser.parse_args(args)

    if not args.suite:
        raise ValueError("Must specify suite with the -s argument")
    if ',' in args.suite:
        raise ValueError("Passing multiple suites is not supported")

    if args.platforms:
        args.platforms = args.platforms.split(',')

    data = query_activedata(args.suite, args.e10s, args.platforms)

    suite = args.suite
    if args.e10s:
        suite = '%s-e10s' % suite
    write_runtimes(data, suite, indir=args.indir, outdir=args.outdir)

if __name__ == "__main__":
    sys.exit(cli())