#!/usr/bin/env python # Copyright (C) 2015 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Generates storage benchmark from captured strace output. Currently assumes that all mmap'ed regions are resource accesses, and emulates as pread(). Usage: $ adb shell strace -p `pid zygote` -o /data/local/tmp/trace -f -ff -y -ttt -e trace=file,desc,munmap $ adb pull /data/local/tmp/trace* $ python benchgen.py trace.* """ import re, sys, collections, traceback, argparse from operator import itemgetter from collections import defaultdict class Event: def __init__(self, thread, time, call, args, ret): self.thread = thread self.time = time self.call = call self.args = args self.ret = ret def __repr__(self): return "%s(%s)=%s" % (self.call, repr(self.args), self.ret) class File: def __init__(self, name, ident): self.name = name self.ident = ident self.size = 0 def __repr__(self): return self.name events = [] files = {} def find_file(name): name = name.strip('<>"') if name not in files: files[name] = File(name, len(files)) return files[name] def extract_file(e, arg): if "<" in arg: fd, path = arg.split("<") path = path.strip(">") handle = "t%sf%s" % (e.thread, fd) return (fd, find_file(path), handle) else: return (None, None, None) def parse_args(s): args = [] arg = "" esc = False quot = False for c in s: if esc: esc = False arg += c continue if c == '"': if quot: quot = False continue else: quot = True continue if c == '\\': esc = True continue if c == ',' and not quot: args.append(arg.strip()) arg = "" else: arg += c args.append(arg.strip()) return args bufsize = 1048576 interesting = ["mmap2","read","write","pread64","pwrite64","fsync","fdatasync","openat","close","lseek","_llseek"] re_event = re.compile(r"^([\d\.]+) (.+?)\((.+?)\) = (.+?)$") re_arg = re.compile(r'''((?:[^,"']|"[^"]*"|'[^']*')+)''') for fn in sys.argv[1:]: with open(fn) as f: thread = int(fn.split(".")[-1]) for line in f: line = re_event.match(line) if not line: continue time, call, args, ret = line.groups() if call not in interesting: continue if "/data/" not in args: continue time = float(time) args = parse_args(args) events.append(Event(thread, time, call, args, ret)) with open("BenchmarkGen.h", 'w') as bench: print >>bench, """/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /****************************************************************** * THIS CODE WAS GENERATED BY benchgen.py, DO NOT MODIFY DIRECTLY * ******************************************************************/ #include #include #include #include #include #include #include #include #include #include namespace android { namespace vold { static status_t BenchmarkRun(std::function checkpoint) { """ print >>bench, "char* buf = (char*) malloc(%d);" % (bufsize) nread = 0 nwrite = 0 nsync = 0 events = sorted(events, key=lambda e: e.time) active = set() defined = set() i = 0 total = len(events) for e in events: i += 1 if i % 256 == 0: print >>bench, "if (!checkpoint(%d)) return -1;" % (50 + ((i * 50) / total)) if e.call == "openat": fd, f, handle = extract_file(e, e.ret) if f: active.add(handle) if handle not in defined: print >>bench, "int", defined.add(handle) create_mode = '' if 'O_CREAT' in e.args[2]: assert len(e.args) > 3, 'File creation lacks a mode?' create_mode = ', ' + e.args[3] print >>bench, '%s = TEMP_FAILURE_RETRY(open("file%s", %s%s));' \ % (handle, f.ident, e.args[2], create_mode) elif e.call == "close": fd, f, handle = extract_file(e, e.args[0]) if handle in active: active.remove(handle) print >>bench, 'close(%s);' % (handle) elif e.call == "lseek": fd, f, handle = extract_file(e, e.args[0]) if handle in active: print >>bench, 'TEMP_FAILURE_RETRY(lseek(%s, %s, %s));' % (handle, e.args[1], e.args[2]) elif e.call == "_llseek": fd, f, handle = extract_file(e, e.args[0]) if handle in active: print >>bench, 'TEMP_FAILURE_RETRY(lseek(%s, %s, %s));' % (handle, e.args[1], e.args[3]) elif e.call == "read": fd, f, handle = extract_file(e, e.args[0]) if handle in active: # TODO: track actual file size instead of guessing count = min(int(e.args[2]), bufsize) f.size += count print >>bench, 'TEMP_FAILURE_RETRY(read(%s, buf, %d));' % (handle, count) nread += 1 elif e.call == "write": fd, f, handle = extract_file(e, e.args[0]) if handle in active: # TODO: track actual file size instead of guessing count = min(int(e.args[2]), bufsize) f.size += count print >>bench, 'TEMP_FAILURE_RETRY(write(%s, buf, %d));' % (handle, count) nwrite += 1 elif e.call == "pread64": fd, f, handle = extract_file(e, e.args[0]) if handle in active: f.size = max(f.size, int(e.args[2]) + int(e.args[3])) count = min(int(e.args[2]), bufsize) print >>bench, 'TEMP_FAILURE_RETRY(pread(%s, buf, %d, %s));' % (handle, count, e.args[3]) nread += 1 elif e.call == "pwrite64": fd, f, handle = extract_file(e, e.args[0]) if handle in active: f.size = max(f.size, int(e.args[2]) + int(e.args[3])) count = min(int(e.args[2]), bufsize) print >>bench, 'TEMP_FAILURE_RETRY(pwrite(%s, buf, %d, %s));' % (handle, count, e.args[3]) nwrite += 1 elif e.call == "fsync": fd, f, handle = extract_file(e, e.args[0]) if handle in active: print >>bench, 'TEMP_FAILURE_RETRY(fsync(%s));' % (handle) nsync += 1 elif e.call == "fdatasync": fd, f, handle = extract_file(e, e.args[0]) if handle in active: print >>bench, 'TEMP_FAILURE_RETRY(fdatasync(%s));' % (handle) nsync += 1 elif e.call == "mmap2": fd, f, handle = extract_file(e, e.args[4]) if handle in active: count = min(int(e.args[1]), bufsize) offset = int(e.args[5], 0) f.size = max(f.size, count + offset) print >>bench, 'TEMP_FAILURE_RETRY(pread(%s, buf, %s, %s)); // mmap2' % (handle, count, offset) nread += 1 for handle in active: print >>bench, 'close(%s);' % (handle) print >>bench, """ free(buf); return 0; } static status_t CreateFile(const char* name, int len) { int chunk = std::min(len, 65536); int out = -1; std::string buf; if (android::vold::ReadRandomBytes(chunk, buf) != OK) { LOG(ERROR) << "Failed to read random data"; return -EIO; } if ((out = TEMP_FAILURE_RETRY(open(name, O_WRONLY|O_CREAT|O_TRUNC, 0644))) < 0) { PLOG(ERROR) << "Failed to open " << name; return -errno; } while (len > 0) { int n = write(out, buf.c_str(), std::min(len, chunk)); if (n < 0) { PLOG(ERROR) << "Failed to write"; close(out); return -errno; } len -= n; } close(out); return OK; } static status_t BenchmarkCreate(std::function checkpoint) { status_t res = 0; res |= CreateFile("stub", 0); """ i = 0 total = len(files.values()) for f in files.values(): i += 1 if i % 12 == 0: print >>bench, "if (!checkpoint(%d)) return -1;" % ((i * 50) / total) print >>bench, 'res |= CreateFile("file%s", %d);' % (f.ident, f.size) print >>bench, """ return res; } static status_t BenchmarkDestroy() { status_t res = 0; res |= unlink("stub"); """ for f in files.values(): print >>bench, 'res |= unlink("file%s");' % (f.ident) print >>bench, """ return res; } static std::string BenchmarkIdent() {""" print >>bench, """return "r%d:w%d:s%d";""" % (nread, nwrite, nsync) print >>bench, """} } // namespace vold } // namespace android """ size = sum([ f.size for f in files.values() ]) print "Found", len(files), "data files accessed, total size", (size/1024), "kB" types = defaultdict(int) for e in events: types[e.call] += 1 print "Found syscalls:" for t, n in types.iteritems(): print str(n).rjust(8), t print