diff options
Diffstat (limited to 'tools/perf/bench')
-rw-r--r-- | tools/perf/bench/bench.h | 18 | ||||
-rw-r--r-- | tools/perf/bench/mem-memcpy-arch.h | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memcpy-x86-64-asm-def.h | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memcpy-x86-64-asm.S | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memcpy.c | 303 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-arch.h | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm-def.h | 12 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset-x86-64-asm.S | 13 | ||||
-rw-r--r-- | tools/perf/bench/mem-memset.c | 297 | ||||
-rw-r--r-- | tools/perf/bench/sched-messaging.c | 336 | ||||
-rw-r--r-- | tools/perf/bench/sched-pipe.c | 127 |
11 files changed, 1154 insertions, 0 deletions
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h new file mode 100644 index 00000000..a09bece6 --- /dev/null +++ b/tools/perf/bench/bench.h @@ -0,0 +1,18 @@ +#ifndef BENCH_H +#define BENCH_H + +extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); +extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); +extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); +extern int bench_mem_memset(int argc, const char **argv, const char *prefix); + +#define BENCH_FORMAT_DEFAULT_STR "default" +#define BENCH_FORMAT_DEFAULT 0 +#define BENCH_FORMAT_SIMPLE_STR "simple" +#define BENCH_FORMAT_SIMPLE 1 + +#define BENCH_FORMAT_UNKNOWN -1 + +extern int bench_format; + +#endif diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h new file mode 100644 index 00000000..a72e36cb --- /dev/null +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -0,0 +1,12 @@ + +#ifdef ARCH_X86_64 + +#define MEMCPY_FN(fn, name, desc) \ + extern void *fn(void *, const void *, size_t); + +#include "mem-memcpy-x86-64-asm-def.h" + +#undef MEMCPY_FN + +#endif + diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h new file mode 100644 index 00000000..d66ab799 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -0,0 +1,12 @@ + +MEMCPY_FN(__memcpy, + "x86-64-unrolled", + "unrolled memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c, + "x86-64-movsq", + "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c_e, + "x86-64-movsb", + "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S new file mode 100644 index 00000000..fcd9cf00 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -0,0 +1,12 @@ +#define memcpy MEMCPY /* don't hide glibc's memcpy() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemcpy_c globl memcpy_c; memcpy_c +#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e +#include "../../../arch/x86/lib/memcpy_64.S" +/* + * We need to provide note.GNU-stack section, saying that we want + * NOT executable stack. Otherwise the final linking will assume that + * the ELF stack should not be restricted at all and set it RWX. + */ +.section .note.GNU-stack,"",@progbits diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c new file mode 100644 index 00000000..71557225 --- /dev/null +++ b/tools/perf/bench/mem-memcpy.c @@ -0,0 +1,303 @@ +/* + * mem-memcpy.c + * + * memcpy: Simple memory copy in various ways + * + * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "mem-memcpy-arch.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int iterations = 1; +static bool use_clock; +static int clock_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memcpy() invocation this number of times"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memcpy()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memcpy()"), + OPT_END() +}; + +typedef void *(*memcpy_t)(void *, const void *, size_t); + +struct routine { + const char *name; + const char *desc; + memcpy_t fn; +}; + +struct routine routines[] = { + { "default", + "Default memcpy() provided by glibc", + memcpy }, +#ifdef ARCH_X86_64 + +#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memcpy-x86-64-asm-def.h" +#undef MEMCPY_FN + +#endif + + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy <options>", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +static void alloc_mem(void **dst, void **src, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); + + *src = zalloc(length); + if (!src) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) +{ + u64 clock_start = 0ULL, clock_end = 0ULL; + void *src = NULL, *dst = NULL; + int i; + + alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + clock_start = get_clock(); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + clock_end = get_clock(); + + free(src); + free(dst); + return clock_end - clock_start; +} + +static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *src = NULL, *dst = NULL; + int i; + + alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __used) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_clock[2]; + + argc = parse_options(argc, argv, options, + bench_mem_memcpy_usage, 0); + + if (use_clock) + init_clock(); + + len = (size_t)perf_atoll((char *)length_str); + + result_clock[0] = result_clock[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_clock) { + result_clock[0] = + do_memcpy_clock(routines[i].fn, len, false); + result_clock[1] = + do_memcpy_clock(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memcpy_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memcpy_gettimeofday(routines[i].fn, + len, true); + } + } else { + if (use_clock) { + result_clock[pf] = + do_memcpy_clock(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memcpy_gettimeofday(routines[i].fn, + len, only_prefault); + } + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)result_clock[0] + / (double)len); + printf(" %14lf Clock/Byte (with prefault)\n", + (double)result_clock[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_clock) { + printf(" %14lf Clock/Byte", + (double)result_clock[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf("%lf %lf\n", + (double)result_clock[0] / (double)len, + (double)result_clock[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_clock) { + printf("%lf\n", (double)result_clock[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 00000000..a040fa77 --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h @@ -0,0 +1,12 @@ + +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) \ + extern void *fn(void *, int, size_t); + +#include "mem-memset-x86-64-asm-def.h" + +#undef MEMSET_FN + +#endif + diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 00000000..a71dff97 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -0,0 +1,12 @@ + +MEMSET_FN(__memset, + "x86-64-unrolled", + "unrolled memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c, + "x86-64-stosq", + "movsq-based memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c_e, + "x86-64-stosb", + "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 00000000..9e5af89e --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -0,0 +1,13 @@ +#define memset MEMSET /* don't hide glibc's memset() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemset_c globl memset_c; memset_c +#define Lmemset_c_e globl memset_c_e; memset_c_e +#include "../../../arch/x86/lib/memset_64.S" + +/* + * We need to provide note.GNU-stack section, saying that we want + * NOT executable stack. Otherwise the final linking will assume that + * the ELF stack should not be restricted at all and set it RWX. + */ +.section .note.GNU-stack,"",@progbits diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 00000000..e9079185 --- /dev/null +++ b/tools/perf/bench/mem-memset.c @@ -0,0 +1,297 @@ +/* + * mem-memset.c + * + * memset: Simple memory set in various ways + * + * Trivial clone of mem-memcpy.c. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "mem-memset-arch.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int iterations = 1; +static bool use_clock; +static int clock_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memset() invocation this number of times"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memset()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memset()"), + OPT_END() +}; + +typedef void *(*memset_t)(void *, int, size_t); + +struct routine { + const char *name; + const char *desc; + memset_t fn; +}; + +static const struct routine routines[] = { + { "default", + "Default memset() provided by glibc", + memset }, +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset <options>", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +static void alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) +{ + u64 clock_start = 0ULL, clock_end = 0ULL; + void *dst = NULL; + int i; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + clock_start = get_clock(); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + clock_end = get_clock(); + + free(dst); + return clock_end - clock_start; +} + +static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *dst = NULL; + int i; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __used) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_clock[2]; + + argc = parse_options(argc, argv, options, + bench_mem_memset_usage, 0); + + if (use_clock) + init_clock(); + + len = (size_t)perf_atoll((char *)length_str); + + result_clock[0] = result_clock[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_clock) { + result_clock[0] = + do_memset_clock(routines[i].fn, len, false); + result_clock[1] = + do_memset_clock(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memset_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memset_gettimeofday(routines[i].fn, + len, true); + } + } else { + if (use_clock) { + result_clock[pf] = + do_memset_clock(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memset_gettimeofday(routines[i].fn, + len, only_prefault); + } + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)result_clock[0] + / (double)len); + printf(" %14lf Clock/Byte (with prefault)\n ", + (double)result_clock[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_clock) { + printf(" %14lf Clock/Byte", + (double)result_clock[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf("%lf %lf\n", + (double)result_clock[0] / (double)len, + (double)result_clock[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_clock) { + printf("%lf\n", (double)result_clock[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c new file mode 100644 index 00000000..d1d1b30f --- /dev/null +++ b/tools/perf/bench/sched-messaging.c @@ -0,0 +1,336 @@ +/* + * + * sched-messaging.c + * + * messaging: Benchmark for scheduler and IPC mechanisms + * + * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au> + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + * + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../builtin.h" +#include "bench.h" + +/* Test groups of 20 processes spraying to 20 receivers */ +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <limits.h> + +#define DATASIZE 100 + +static bool use_pipes = false; +static unsigned int loops = 100; +static bool thread_mode = false; +static unsigned int num_groups = 10; + +struct sender_context { + unsigned int num_fds; + int ready_out; + int wakefd; + int out_fds[0]; +}; + +struct receiver_context { + unsigned int num_packets; + int in_fds[2]; + int ready_out; + int wakefd; +}; + +static void barf(const char *msg) +{ + fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); + exit(1); +} + +static void fdpair(int fds[2]) +{ + if (use_pipes) { + if (pipe(fds) == 0) + return; + } else { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0) + return; + } + + barf(use_pipes ? "pipe()" : "socketpair()"); +} + +/* Block until we're ready to go */ +static void ready(int ready_out, int wakefd) +{ + char dummy; + struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; + + /* Tell them we're ready. */ + if (write(ready_out, &dummy, 1) != 1) + barf("CLIENT: ready write"); + + /* Wait for "GO" signal */ + if (poll(&pollfd, 1, -1) != 1) + barf("poll"); +} + +/* Sender sprays loops messages down each file descriptor */ +static void *sender(struct sender_context *ctx) +{ + char data[DATASIZE]; + unsigned int i, j; + + ready(ctx->ready_out, ctx->wakefd); + + /* Now pump to every receiver. */ + for (i = 0; i < loops; i++) { + for (j = 0; j < ctx->num_fds; j++) { + int ret, done = 0; + +again: + ret = write(ctx->out_fds[j], data + done, + sizeof(data)-done); + if (ret < 0) + barf("SENDER: write"); + done += ret; + if (done < DATASIZE) + goto again; + } + } + + return NULL; +} + + +/* One receiver per fd */ +static void *receiver(struct receiver_context* ctx) +{ + unsigned int i; + + if (!thread_mode) + close(ctx->in_fds[1]); + + /* Wait for start... */ + ready(ctx->ready_out, ctx->wakefd); + + /* Receive them all */ + for (i = 0; i < ctx->num_packets; i++) { + char data[DATASIZE]; + int ret, done = 0; + +again: + ret = read(ctx->in_fds[0], data + done, DATASIZE - done); + if (ret < 0) + barf("SERVER: read"); + done += ret; + if (done < DATASIZE) + goto again; + } + + return NULL; +} + +static pthread_t create_worker(void *ctx, void *(*func)(void *)) +{ + pthread_attr_t attr; + pthread_t childid; + int err; + + if (!thread_mode) { + /* process mode */ + /* Fork the receiver. */ + switch (fork()) { + case -1: + barf("fork()"); + break; + case 0: + (*func) (ctx); + exit(0); + break; + default: + break; + } + + return (pthread_t)0; + } + + if (pthread_attr_init(&attr) != 0) + barf("pthread_attr_init:"); + +#ifndef __ia64__ + if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) + barf("pthread_attr_setstacksize"); +#endif + + err = pthread_create(&childid, &attr, func, ctx); + if (err != 0) { + fprintf(stderr, "pthread_create failed: %s (%d)\n", + strerror(err), err); + exit(-1); + } + return childid; +} + +static void reap_worker(pthread_t id) +{ + int proc_status; + void *thread_status; + + if (!thread_mode) { + /* process mode */ + wait(&proc_status); + if (!WIFEXITED(proc_status)) + exit(1); + } else { + pthread_join(id, &thread_status); + } +} + +/* One group of senders and receivers */ +static unsigned int group(pthread_t *pth, + unsigned int num_fds, + int ready_out, + int wakefd) +{ + unsigned int i; + struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) + + num_fds * sizeof(int)); + + if (!snd_ctx) + barf("malloc()"); + + for (i = 0; i < num_fds; i++) { + int fds[2]; + struct receiver_context *ctx = malloc(sizeof(*ctx)); + + if (!ctx) + barf("malloc()"); + + + /* Create the pipe between client and server */ + fdpair(fds); + + ctx->num_packets = num_fds * loops; + ctx->in_fds[0] = fds[0]; + ctx->in_fds[1] = fds[1]; + ctx->ready_out = ready_out; + ctx->wakefd = wakefd; + + pth[i] = create_worker(ctx, (void *)receiver); + + snd_ctx->out_fds[i] = fds[1]; + if (!thread_mode) + close(fds[0]); + } + + /* Now we have all the fds, fork the senders */ + for (i = 0; i < num_fds; i++) { + snd_ctx->ready_out = ready_out; + snd_ctx->wakefd = wakefd; + snd_ctx->num_fds = num_fds; + + pth[num_fds+i] = create_worker(snd_ctx, (void *)sender); + } + + /* Close the fds we have left */ + if (!thread_mode) + for (i = 0; i < num_fds; i++) + close(snd_ctx->out_fds[i]); + + /* Return number of children to reap */ + return num_fds * 2; +} + +static const struct option options[] = { + OPT_BOOLEAN('p', "pipe", &use_pipes, + "Use pipe() instead of socketpair()"), + OPT_BOOLEAN('t', "thread", &thread_mode, + "Be multi thread instead of multi process"), + OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"), + OPT_UINTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_sched_message_usage[] = { + "perf bench sched messaging <options>", + NULL +}; + +int bench_sched_messaging(int argc, const char **argv, + const char *prefix __used) +{ + unsigned int i, total_children; + struct timeval start, stop, diff; + unsigned int num_fds = 20; + int readyfds[2], wakefds[2]; + char dummy; + pthread_t *pth_tab; + + argc = parse_options(argc, argv, options, + bench_sched_message_usage, 0); + + pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); + if (!pth_tab) + barf("main:malloc()"); + + fdpair(readyfds); + fdpair(wakefds); + + total_children = 0; + for (i = 0; i < num_groups; i++) + total_children += group(pth_tab+total_children, num_fds, + readyfds[1], wakefds[0]); + + /* Wait for everyone to be ready */ + for (i = 0; i < total_children; i++) + if (read(readyfds[0], &dummy, 1) != 1) + barf("Reading for readyfds"); + + gettimeofday(&start, NULL); + + /* Kick them off */ + if (write(wakefds[1], &dummy, 1) != 1) + barf("Writing to start them"); + + /* Reap them all */ + for (i = 0; i < total_children; i++) + reap_worker(pth_tab[i]); + + gettimeofday(&stop, NULL); + + timersub(&stop, &start, &diff); + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# %d sender and receiver %s per group\n", + num_fds, thread_mode ? "threads" : "processes"); + printf("# %d groups == %d %s run\n\n", + num_groups, num_groups * 2 * num_fds, + thread_mode ? "threads" : "processes"); + printf(" %14s: %lu.%03lu [sec]\n", "Total time", + diff.tv_sec, + (unsigned long) (diff.tv_usec/1000)); + break; + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", diff.tv_sec, + (unsigned long) (diff.tv_usec/1000)); + break; + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c new file mode 100644 index 00000000..0c7454f8 --- /dev/null +++ b/tools/perf/bench/sched-pipe.c @@ -0,0 +1,127 @@ +/* + * + * sched-pipe.c + * + * pipe: Benchmark for pipe() + * + * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com> + * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + * + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../builtin.h" +#include "bench.h" + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/wait.h> +#include <linux/unistd.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/time.h> +#include <sys/types.h> + +#define LOOPS_DEFAULT 1000000 +static int loops = LOOPS_DEFAULT; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, + "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_sched_pipe_usage[] = { + "perf bench sched pipe <options>", + NULL +}; + +int bench_sched_pipe(int argc, const char **argv, + const char *prefix __used) +{ + int pipe_1[2], pipe_2[2]; + int m = 0, i; + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + + /* + * why does "ret" exist? + * discarding returned value of read(), write() + * causes error in building environment for perf + */ + int __used ret, wait_stat; + pid_t pid, retpid; + + argc = parse_options(argc, argv, options, + bench_sched_pipe_usage, 0); + + assert(!pipe(pipe_1)); + assert(!pipe(pipe_2)); + + pid = fork(); + assert(pid >= 0); + + gettimeofday(&start, NULL); + + if (!pid) { + for (i = 0; i < loops; i++) { + ret = read(pipe_1[0], &m, sizeof(int)); + ret = write(pipe_2[1], &m, sizeof(int)); + } + } else { + for (i = 0; i < loops; i++) { + ret = write(pipe_1[1], &m, sizeof(int)); + ret = read(pipe_2[0], &m, sizeof(int)); + } + } + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + if (pid) { + retpid = waitpid(pid, &wait_stat, 0); + assert((retpid == pid) && WIFEXITED(wait_stat)); + } else { + exit(0); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Executed %d pipe operations between two tasks\n\n", + loops); + + result_usec = diff.tv_sec * 1000000; + result_usec += diff.tv_usec; + + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + diff.tv_sec, + (unsigned long) (diff.tv_usec/1000)); + + printf(" %14lf usecs/op\n", + (double)result_usec / (double)loops); + printf(" %14d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)1000000))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + diff.tv_sec, + (unsigned long) (diff.tv_usec / 1000)); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} |