/**************************************************************************
*
* Copyright 2011 Jose Fonseca
+ * Copyright (C) 2013 Intel Corporation. All rights reversed.
+ * Author: Shuang He <shuang.he@intel.com>
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
#include <string.h>
+#include <limits.h> // for CHAR_MAX
#include <iostream>
+#include <getopt.h>
#include "os_binary.hpp"
#include "os_time.hpp"
-#include "os_workqueue.hpp"
+#include "os_thread.hpp"
#include "image.hpp"
#include "trace_callset.hpp"
#include "trace_dump.hpp"
+#include "trace_option.hpp"
#include "retrace.hpp"
static bool waitOnFinish = false;
-static bool use_threads;
static const char *comparePrefix = NULL;
static const char *snapshotPrefix = NULL;
trace::Parser parser;
trace::Profiler profiler;
-static std::map<unsigned long, os::WorkQueue *> thread_wq_map;
int verbosity = 0;
bool debug = true;
bool dumpingState = false;
+Driver driver = DRIVER_DEFAULT;
+const char *driverModule = NULL;
+
bool doubleBuffer = true;
bool coreProfile = false;
bool profilingGpuTimes = false;
bool profilingCpuTimes = false;
bool profilingPixelsDrawn = false;
+bool profilingMemoryUsage = false;
+bool useCallNos = true;
+bool singleThread = false;
unsigned frameNo = 0;
unsigned callNo = 0;
-static bool state_dumped;
-class RenderWork : public os::WorkQueueWork
-{
- trace::Call *call;
-public:
- void run(void);
- RenderWork(trace::Call *_call) { call = _call; }
- ~RenderWork(void) { delete call; }
-};
-
-class FlushGLWork : public os::WorkQueueWork
-{
-public:
- void run(void) { flushRendering(); }
-};
void
frameComplete(trace::Call &call) {
}
+static Dumper defaultDumper;
+
+Dumper *dumper = &defaultDumper;
+
+
+/**
+ * Take/compare snapshots.
+ */
static void
takeSnapshot(unsigned call_no) {
+ static unsigned snapshot_no = 0;
+
assert(snapshotPrefix || comparePrefix);
image::Image *ref = NULL;
}
}
- image::Image *src = getSnapshot();
+ image::Image *src = dumper->getSnapshot();
if (!src) {
+ std::cout << "Failed to get snapshot\n";
return;
}
if (snapshotPrefix) {
if (snapshotPrefix[0] == '-' && snapshotPrefix[1] == 0) {
char comment[21];
- snprintf(comment, sizeof comment, "%u", call_no);
+ snprintf(comment, sizeof comment, "%u",
+ useCallNos ? call_no : snapshot_no);
src->writePNM(std::cout, comment);
} else {
- os::String filename = os::String::format("%s%010u.png", snapshotPrefix, call_no);
+ os::String filename = os::String::format("%s%010u.png",
+ snapshotPrefix,
+ useCallNos ? call_no : snapshot_no);
if (src->writePNG(filename) && retrace::verbosity >= 0) {
std::cout << "Wrote " << filename << "\n";
}
delete src;
+ snapshot_no++;
+
return;
}
-void RenderWork::run(void)
-{
+
+/**
+ * Retrace one call.
+ *
+ * Take snapshots before/after retracing (as appropriate) and dispatch it to
+ * the respective handler.
+ */
+static void
+retraceCall(trace::Call *call) {
bool swapRenderTarget = call->flags &
trace::CALL_FLAG_SWAP_RENDERTARGET;
bool doSnapshot = snapshotFrequency.contains(*call) ||
compareFrequency.contains(*call);
- if (state_dumped)
- return;
-
// For calls which cause rendertargets to be swaped, we take the
// snapshot _before_ swapping the rendertargets.
if (doSnapshot && swapRenderTarget) {
if (doSnapshot && !swapRenderTarget)
takeSnapshot(call->no);
- if (call->no >= dumpStateCallNo && dumpState(std::cout))
- state_dumped = true;
+ if (call->no >= dumpStateCallNo &&
+ dumper->dumpState(std::cout)) {
+ exit(0);
+ }
}
-static os::WorkQueue *get_work_queue(unsigned long thread_id)
+
+class RelayRunner;
+
+
+/**
+ * Implement multi-threading by mimicking a relay race.
+ */
+class RelayRace
{
- os::WorkQueue *thread;
- std::map<unsigned long, os::WorkQueue *>::iterator it;
+private:
+ /**
+ * Runners indexed by the leg they run (i.e, the thread_ids from the
+ * trace).
+ */
+ std::vector<RelayRunner*> runners;
- it = thread_wq_map.find(thread_id);
- if (it == thread_wq_map.end()) {
- thread = new os::WorkQueue();
- thread_wq_map[thread_id] = thread;
- } else {
- thread = it->second;
+public:
+ RelayRace();
+
+ ~RelayRace();
+
+ RelayRunner *
+ getRunner(unsigned leg);
+
+ inline RelayRunner *
+ getForeRunner() {
+ return getRunner(0);
}
- return thread;
-}
+ void
+ run(void);
+
+ void
+ passBaton(trace::Call *call);
+
+ void
+ finishLine();
-static void exit_work_queues(void)
+ void
+ stopRunners();
+};
+
+
+/**
+ * Each runner is a thread.
+ *
+ * The fore runner doesn't have its own thread, but instead uses the thread
+ * where the race started.
+ */
+class RelayRunner
{
- std::map<unsigned long, os::WorkQueue *>::iterator it;
+private:
+ friend class RelayRace;
+
+ RelayRace *race;
- it = thread_wq_map.begin();
- while (it != thread_wq_map.end()) {
- os::WorkQueue *thread_wq = it->second;
+ unsigned leg;
+
+ os::mutex mutex;
+ os::condition_variable wake_cond;
- thread_wq->queue_work(new FlushGLWork);
- thread_wq->flush();
- thread_wq->destroy();
- thread_wq_map.erase(it++);
+ /**
+ * There are protected by the mutex.
+ */
+ bool finished;
+ trace::Call *baton;
+
+ os::thread thread;
+
+ static void *
+ runnerThread(RelayRunner *_this);
+
+public:
+ RelayRunner(RelayRace *race, unsigned _leg) :
+ race(race),
+ leg(_leg),
+ finished(false),
+ baton(0)
+ {
+ /* The fore runner does not need a new thread */
+ if (leg) {
+ thread = os::thread(runnerThread, this);
+ }
}
-}
-static void do_all_calls(void)
-{
- trace::Call *call;
- int prev_thread_id = -1;
- os::WorkQueue *thread_wq = NULL;
-
- while ((call = parser.parse_call())) {
- RenderWork *render_work = new RenderWork(call);
-
- if (use_threads) {
- if (prev_thread_id != call->thread_id) {
- if (thread_wq)
- thread_wq->flush();
- thread_wq = get_work_queue(call->thread_id);
- prev_thread_id = call->thread_id;
+ /**
+ * Thread main loop.
+ */
+ void
+ runRace(void) {
+ os::unique_lock<os::mutex> lock(mutex);
+
+ while (1) {
+ while (!finished && !baton) {
+ wake_cond.wait(lock);
+ }
+
+ if (finished) {
+ break;
}
- thread_wq->queue_work(render_work);
+ assert(baton);
+ trace::Call *call = baton;
+ baton = 0;
+
+ runLeg(call);
+ }
+
+ if (0) std::cerr << "leg " << leg << " actually finishing\n";
+
+ if (leg == 0) {
+ race->stopRunners();
+ }
+ }
- // XXX: Flush immediately to avoid race conditions on unprotected
- // static/global variables.
- thread_wq->flush();
+ /**
+ * Interpret successive calls.
+ */
+ void
+ runLeg(trace::Call *call) {
+ /* Consume successive calls for this thread. */
+ do {
+ assert(call);
+ assert(call->thread_id == leg);
+ retraceCall(call);
+ delete call;
+ call = parser.parse_call();
+ } while (call && call->thread_id == leg);
+
+ if (call) {
+ /* Pass the baton */
+ assert(call->thread_id != leg);
+ flushRendering();
+ race->passBaton(call);
} else {
- render_work->run();
- delete render_work;
+ /* Reached the finish line */
+ if (0) std::cerr << "finished on leg " << leg << "\n";
+ if (leg) {
+ /* Notify the fore runner */
+ race->finishLine();
+ } else {
+ /* We are the fore runner */
+ finished = true;
+ }
}
+ }
- if (state_dumped)
- break;
+ /**
+ * Called by other threads when relinquishing the baton.
+ */
+ void
+ receiveBaton(trace::Call *call) {
+ assert (call->thread_id == leg);
+
+ mutex.lock();
+ baton = call;
+ mutex.unlock();
+
+ wake_cond.signal();
+ }
+
+ /**
+ * Called by the fore runner when the race is over.
+ */
+ void
+ finishRace() {
+ if (0) std::cerr << "notify finish to leg " << leg << "\n";
+
+ mutex.lock();
+ finished = true;
+ mutex.unlock();
+
+ wake_cond.signal();
+ }
+};
+
+
+void *
+RelayRunner::runnerThread(RelayRunner *_this) {
+ _this->runRace();
+ return 0;
+}
+
+
+RelayRace::RelayRace() {
+ runners.push_back(new RelayRunner(this, 0));
+}
+
+
+RelayRace::~RelayRace() {
+ assert(runners.size() >= 1);
+ std::vector<RelayRunner*>::const_iterator it;
+ for (it = runners.begin(); it != runners.end(); ++it) {
+ RelayRunner* runner = *it;
+ if (runner) {
+ delete runner;
+ }
+ }
+}
+
+
+/**
+ * Get (or instantiate) a runner for the specified leg.
+ */
+RelayRunner *
+RelayRace::getRunner(unsigned leg) {
+ RelayRunner *runner;
+
+ if (leg >= runners.size()) {
+ runners.resize(leg + 1);
+ runner = 0;
+ } else {
+ runner = runners[leg];
+ }
+ if (!runner) {
+ runner = new RelayRunner(this, leg);
+ runners[leg] = runner;
}
+ return runner;
+}
+
- exit_work_queues();
+/**
+ * Start the race.
+ */
+void
+RelayRace::run(void) {
+ trace::Call *call;
+ call = parser.parse_call();
+ if (!call) {
+ /* Nothing to do */
+ return;
+ }
+
+ RelayRunner *foreRunner = getForeRunner();
+ if (call->thread_id == 0) {
+ /* We are the forerunner thread, so no need to pass baton */
+ foreRunner->baton = call;
+ } else {
+ passBaton(call);
+ }
+
+ /* Start the forerunner thread */
+ foreRunner->runRace();
+}
+
+
+/**
+ * Pass the baton (i.e., the call) to the appropriate thread.
+ */
+void
+RelayRace::passBaton(trace::Call *call) {
+ if (0) std::cerr << "switching to thread " << call->thread_id << "\n";
+ RelayRunner *runner = getRunner(call->thread_id);
+ runner->receiveBaton(call);
+}
+
+
+/**
+ * Called when a runner other than the forerunner reaches the finish line.
+ *
+ * Only the fore runner can finish the race, so inform him that the race is
+ * finished.
+ */
+void
+RelayRace::finishLine(void) {
+ RelayRunner *foreRunner = getForeRunner();
+ foreRunner->finishRace();
+}
+
+
+/**
+ * Called by the fore runner after finish line to stop all other runners.
+ */
+void
+RelayRace::stopRunners(void) {
+ std::vector<RelayRunner*>::const_iterator it;
+ for (it = runners.begin() + 1; it != runners.end(); ++it) {
+ RelayRunner* runner = *it;
+ if (runner) {
+ runner->finishRace();
+ }
+ }
}
startTime = os::getTime();
- do_all_calls();
-
- if (!use_threads)
- /*
- * Reached the end of trace; if using threads we do the flush
- * when exiting the threads.
- */
+ if (singleThread) {
+ trace::Call *call;
+ while ((call = parser.parse_call())) {
+ retraceCall(call);
+ delete call;
+ };
flushRendering();
+ } else {
+ RelayRace race;
+ race.run();
+ }
long long endTime = os::getTime();
float timeInterval = (endTime - startTime) * (1.0 / os::timeFrequency);
"Usage: " << argv0 << " [OPTION] TRACE [...]\n"
"Replay TRACE.\n"
"\n"
- " -b benchmark mode (no error checking or warning messages)\n"
- " -pcpu cpu profiling (cpu times per call)\n"
- " -pgpu gpu profiling (gpu times per draw call)\n"
- " -ppd pixels drawn profiling (pixels drawn per draw call)\n"
- " -c PREFIX compare against snapshots\n"
- " -C CALLSET calls to compare (default is every frame)\n"
- " -core use core profile\n"
- " -db use a double buffer visual (default)\n"
- " -sb use a single buffer visual\n"
- " -s PREFIX take snapshots; `-` for PNM stdout output\n"
- " -S CALLSET calls to snapshot (default is every frame)\n"
- " -v increase output verbosity\n"
- " -D CALLNO dump state at specific call no\n"
- " -w waitOnFinish on final frame\n"
- " -t enable threading\n";
+ " -b, --benchmark benchmark mode (no error checking or warning messages)\n"
+ " --pcpu cpu profiling (cpu times per call)\n"
+ " --pgpu gpu profiling (gpu times per draw call)\n"
+ " --ppd pixels drawn profiling (pixels drawn per draw call)\n"
+ " --pmem memory usage profiling (vsize rss per call)\n"
+ " -c, --compare=PREFIX compare against snapshots with given PREFIX\n"
+ " -C, --calls=CALLSET calls to compare (default is every frame)\n"
+ " --call-nos[=BOOL] use call numbers in snapshot filenames\n"
+ " --core use core profile\n"
+ " --db use a double buffer visual (default)\n"
+ " --driver=DRIVER force driver type (`hw`, `sw`, `ref`, `null`, or driver module name)\n"
+ " --sb use a single buffer visual\n"
+ " -s, --snapshot-prefix=PREFIX take snapshots; `-` for PNM stdout output\n"
+ " -S, --snapshot=CALLSET calls to snapshot (default is every frame)\n"
+ " -v, --verbose increase output verbosity\n"
+ " -D, --dump-state=CALL dump state at specific call no\n"
+ " -w, --wait waitOnFinish on final frame\n"
+ " --singlethread use a single thread to replay command stream\n";
+}
+
+enum {
+ CALL_NOS_OPT = CHAR_MAX + 1,
+ CORE_OPT,
+ DB_OPT,
+ DRIVER_OPT,
+ PCPU_OPT,
+ PGPU_OPT,
+ PPD_OPT,
+ PMEM_OPT,
+ SB_OPT,
+ SINGLETHREAD_OPT,
+};
+
+const static char *
+shortOptions = "bc:C:D:hs:S:vw";
+
+const static struct option
+longOptions[] = {
+ {"benchmark", no_argument, 0, 'b'},
+ {"call-nos", optional_argument, 0, CALL_NOS_OPT },
+ {"calls", required_argument, 0, 'C'},
+ {"compare", required_argument, 0, 'c'},
+ {"core", no_argument, 0, CORE_OPT},
+ {"db", no_argument, 0, DB_OPT},
+ {"driver", required_argument, 0, DRIVER_OPT},
+ {"dump-state", required_argument, 0, 'D'},
+ {"help", no_argument, 0, 'h'},
+ {"pcpu", no_argument, 0, PCPU_OPT},
+ {"pgpu", no_argument, 0, PGPU_OPT},
+ {"ppd", no_argument, 0, PPD_OPT},
+ {"pmem", no_argument, 0, PMEM_OPT},
+ {"sb", no_argument, 0, SB_OPT},
+ {"snapshot-prefix", required_argument, 0, 's'},
+ {"snapshot", required_argument, 0, 'S'},
+ {"verbose", no_argument, 0, 'v'},
+ {"wait", no_argument, 0, 'w'},
+ {"singlethread", no_argument, 0, SINGLETHREAD_OPT},
+ {0, 0, 0, 0}
+};
+
+
+static void exceptionCallback(void)
+{
+ std::cerr << retrace::callNo << ": error: caught an unhandled exception\n";
}
int main(int argc, char **argv)
{
using namespace retrace;
+ int i;
assert(compareFrequency.empty());
assert(snapshotFrequency.empty());
- int i;
- for (i = 1; i < argc; ++i) {
- const char *arg = argv[i];
-
- if (arg[0] != '-') {
- break;
- }
-
- if (!strcmp(arg, "--")) {
- break;
- } else if (!strcmp(arg, "-b")) {
+ int opt;
+ while ((opt = getopt_long_only(argc, argv, shortOptions, longOptions, NULL)) != -1) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ return 0;
+ case 'b':
retrace::debug = false;
retrace::verbosity = -1;
- } else if (!strcmp(arg, "-c")) {
- comparePrefix = argv[++i];
+ break;
+ case CALL_NOS_OPT:
+ useCallNos = trace::boolOption(optarg);
+ break;
+ case 'c':
+ comparePrefix = optarg;
if (compareFrequency.empty()) {
compareFrequency = trace::CallSet(trace::FREQUENCY_FRAME);
}
- } else if (!strcmp(arg, "-C")) {
- compareFrequency = trace::CallSet(argv[++i]);
+ break;
+ case 'C':
+ compareFrequency = trace::CallSet(optarg);
if (comparePrefix == NULL) {
comparePrefix = "";
}
- } else if (!strcmp(arg, "-D")) {
- dumpStateCallNo = atoi(argv[++i]);
+ break;
+ case 'D':
+ dumpStateCallNo = atoi(optarg);
dumpingState = true;
retrace::verbosity = -2;
- } else if (!strcmp(arg, "-core")) {
+ break;
+ case CORE_OPT:
retrace::coreProfile = true;
- } else if (!strcmp(arg, "-db")) {
+ break;
+ case DB_OPT:
retrace::doubleBuffer = true;
- } else if (!strcmp(arg, "-sb")) {
+ break;
+ case DRIVER_OPT:
+ if (strcasecmp(optarg, "hw") == 0) {
+ driver = DRIVER_HARDWARE;
+ } else if (strcasecmp(optarg, "sw") == 0) {
+ driver = DRIVER_SOFTWARE;
+ } else if (strcasecmp(optarg, "ref") == 0) {
+ driver = DRIVER_REFERENCE;
+ } else if (strcasecmp(optarg, "null") == 0) {
+ driver = DRIVER_NULL;
+ } else {
+ driver = DRIVER_MODULE;
+ driverModule = optarg;
+ }
+ break;
+ case SB_OPT:
retrace::doubleBuffer = false;
- } else if (!strcmp(arg, "--help")) {
- usage(argv[0]);
- return 0;
- } else if (!strcmp(arg, "-s")) {
- snapshotPrefix = argv[++i];
+ break;
+ case SINGLETHREAD_OPT:
+ retrace::singleThread = true;
+ break;
+ case 's':
+ snapshotPrefix = optarg;
if (snapshotFrequency.empty()) {
snapshotFrequency = trace::CallSet(trace::FREQUENCY_FRAME);
}
os::setBinaryMode(stdout);
retrace::verbosity = -2;
}
- } else if (!strcmp(arg, "-S")) {
- snapshotFrequency = trace::CallSet(argv[++i]);
+ break;
+ case 'S':
+ snapshotFrequency = trace::CallSet(optarg);
if (snapshotPrefix == NULL) {
snapshotPrefix = "";
}
- } else if (!strcmp(arg, "-v")) {
+ break;
+ case 'v':
++retrace::verbosity;
- } else if (!strcmp(arg, "-w")) {
+ break;
+ case 'w':
waitOnFinish = true;
- } else if (arg[1] == 'p') {
+ break;
+ case PGPU_OPT:
retrace::debug = false;
retrace::profiling = true;
retrace::verbosity = -1;
- if (!strcmp(arg, "-pcpu")) {
- retrace::profilingCpuTimes = true;
- } else if (!strcmp(arg, "-pgpu")) {
- retrace::profilingGpuTimes = true;
- } else if (!strcmp(arg, "-ppd")) {
- retrace::profilingPixelsDrawn = true;
- }
- } else if (!strcmp(arg, "-t")) {
- use_threads = true;
- } else {
- std::cerr << "error: unknown option " << arg << "\n";
+ retrace::profilingGpuTimes = true;
+ break;
+ case PCPU_OPT:
+ retrace::debug = false;
+ retrace::profiling = true;
+ retrace::verbosity = -1;
+
+ retrace::profilingCpuTimes = true;
+ break;
+ case PPD_OPT:
+ retrace::debug = false;
+ retrace::profiling = true;
+ retrace::verbosity = -1;
+
+ retrace::profilingPixelsDrawn = true;
+ break;
+ case PMEM_OPT:
+ retrace::debug = false;
+ retrace::profiling = true;
+ retrace::verbosity = -1;
+
+ retrace::profilingMemoryUsage = true;
+ break;
+ default:
+ std::cerr << "error: unknown option " << opt << "\n";
usage(argv[0]);
return 1;
}
retrace::setUp();
if (retrace::profiling) {
- retrace::profiler.setup(retrace::profilingCpuTimes, retrace::profilingGpuTimes, retrace::profilingPixelsDrawn);
+ retrace::profiler.setup(retrace::profilingCpuTimes, retrace::profilingGpuTimes, retrace::profilingPixelsDrawn, retrace::profilingMemoryUsage);
}
- for ( ; i < argc; ++i) {
+ os::setExceptionCallback(exceptionCallback);
+
+ for (i = optind; i < argc; ++i) {
if (!retrace::parser.open(argv[i])) {
- std::cerr << "error: failed to open " << argv[i] << "\n";
return 1;
}
retrace::parser.close();
}
+
+ os::resetExceptionCallback();
// XXX: X often hangs on XCloseDisplay
//retrace::cleanUp();