This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "".
The branch, master has been updated via 9ccda90b1788836848ecc5977d9cd8a807ee20c9 (commit) via 97b316f2342a317843b476094ed7323ca899504c (commit) via bd2775a49b137034294085b0fc5bf129accc1ea8 (commit) from 6e741deee25c81429112b82ba54f09205bb7c0ab (commit)
Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below.
- Log ----------------------------------------------------------------- commit 9ccda90b1788836848ecc5977d9cd8a807ee20c9 Author: Petri Savolainen petri.savolainen@linaro.org Date: Fri Jun 29 16:00:09 2018 +0300
linux-gen: ring: remove unnecessary r_tail synchronization
Reader data cannot be overwritten until reader itself enqueues data back to the ring. Ring size is equal or larger than maximum number of items that will be stored into the ring.
Signed-off-by: Petri Savolainen petri.savolainen@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/include/odp_ring_internal.h b/platform/linux-generic/include/odp_ring_internal.h index 130d74cc..97673bef 100644 --- a/platform/linux-generic/include/odp_ring_internal.h +++ b/platform/linux-generic/include/odp_ring_internal.h @@ -36,7 +36,6 @@ typedef struct ODP_ALIGNED_CACHE {
/* Reader head and tail */ odp_atomic_u32_t r_head; - odp_atomic_u32_t r_tail;
uint32_t data[0]; } ring_t; @@ -57,14 +56,12 @@ static inline void ring_init(ring_t *ring) odp_atomic_init_u32(&ring->w_head, 0); odp_atomic_init_u32(&ring->w_tail, 0); odp_atomic_init_u32(&ring->r_head, 0); - odp_atomic_init_u32(&ring->r_tail, 0); }
/* Dequeue data from the ring head */ static inline uint32_t ring_deq(ring_t *ring, uint32_t mask) { uint32_t head, tail, new_head; - uint32_t data;
/* Load/CAS acquire of r_head ensures that w_tail load happens after * r_head load, and thus head value is always behind or equal to tail @@ -81,20 +78,12 @@ static inline uint32_t ring_deq(ring_t *ring, uint32_t mask) new_head = head + 1;
} while (odp_unlikely(cas_mo_u32(&ring->r_head, &head, new_head, - __ATOMIC_ACQUIRE, + __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) == 0));
- /* Read queue index */ - data = ring->data[new_head & mask]; - - /* Wait until other readers have updated the tail */ - while (odp_unlikely(odp_atomic_load_acq_u32(&ring->r_tail) != head)) - odp_cpu_pause(); - - /* Now update the reader tail */ - odp_atomic_store_rel_u32(&ring->r_tail, new_head); - - return data; + /* Read data. CAS acquire-release ensures that data read + * does not move above from here. */ + return ring->data[new_head & mask]; }
/* Dequeue multiple data from the ring head. Num is smaller than ring size. */ @@ -123,20 +112,14 @@ static inline uint32_t ring_deq_multi(ring_t *ring, uint32_t mask, new_head = head + num;
} while (odp_unlikely(cas_mo_u32(&ring->r_head, &head, new_head, - __ATOMIC_ACQUIRE, + __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) == 0));
- /* Read queue index */ + /* Read data. CAS acquire-release ensures that data read + * does not move above from here. */ for (i = 0; i < num; i++) data[i] = ring->data[(head + 1 + i) & mask];
- /* Wait until other readers have updated the tail */ - while (odp_unlikely(odp_atomic_load_acq_u32(&ring->r_tail) != head)) - odp_cpu_pause(); - - /* Now update the reader tail */ - odp_atomic_store_rel_u32(&ring->r_tail, new_head); - return num; }
@@ -149,10 +132,6 @@ static inline void ring_enq(ring_t *ring, uint32_t mask, uint32_t data) old_head = odp_atomic_fetch_inc_u32(&ring->w_head); new_head = old_head + 1;
- /* Ring is full. Wait for the last reader to finish. */ - while (odp_unlikely(odp_atomic_load_acq_u32(&ring->r_tail) == new_head)) - odp_cpu_pause(); - /* Write data */ ring->data[new_head & mask] = data;
@@ -160,7 +139,7 @@ static inline void ring_enq(ring_t *ring, uint32_t mask, uint32_t data) while (odp_unlikely(odp_atomic_load_acq_u32(&ring->w_tail) != old_head)) odp_cpu_pause();
- /* Now update the writer tail */ + /* Release the new writer tail, readers acquire it. */ odp_atomic_store_rel_u32(&ring->w_tail, new_head); }
@@ -174,10 +153,6 @@ static inline void ring_enq_multi(ring_t *ring, uint32_t mask, uint32_t data[], old_head = odp_atomic_fetch_add_u32(&ring->w_head, num); new_head = old_head + 1;
- /* Ring is full. Wait for the last reader to finish. */ - while (odp_unlikely(odp_atomic_load_acq_u32(&ring->r_tail) == new_head)) - odp_cpu_pause(); - /* Write data */ for (i = 0; i < num; i++) ring->data[(new_head + i) & mask] = data[i]; @@ -186,7 +161,7 @@ static inline void ring_enq_multi(ring_t *ring, uint32_t mask, uint32_t data[], while (odp_unlikely(odp_atomic_load_acq_u32(&ring->w_tail) != old_head)) odp_cpu_pause();
- /* Now update the writer tail */ + /* Release the new writer tail, readers acquire it. */ odp_atomic_store_rel_u32(&ring->w_tail, old_head + num); }
commit 97b316f2342a317843b476094ed7323ca899504c Author: Petri Savolainen petri.savolainen@linaro.org Date: Tue Jul 3 13:53:01 2018 +0300
test: sched_perf: add new scheduler performance test
Standalone test for scheduler performance testing with multiple threads.
Signed-off-by: Petri Savolainen petri.savolainen@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/test/performance/.gitignore b/test/performance/.gitignore index b99ba2ab..499c4d33 100644 --- a/test/performance/.gitignore +++ b/test/performance/.gitignore @@ -10,5 +10,6 @@ odp_pktio_perf odp_pool_perf odp_queue_perf odp_sched_latency +odp_sched_perf odp_sched_pktio odp_scheduling diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index a8e0f439..a110c23a 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -7,7 +7,8 @@ EXECUTABLES = odp_bench_packet \ odp_ipsec \ odp_pktio_perf \ odp_pool_perf \ - odp_queue_perf + odp_queue_perf \ + odp_sched_perf
COMPILE_ONLY = odp_l2fwd \ odp_pktio_ordered \ @@ -42,6 +43,7 @@ odp_scheduling_SOURCES = odp_scheduling.c odp_pktio_perf_SOURCES = odp_pktio_perf.c odp_pool_perf_SOURCES = odp_pool_perf.c odp_queue_perf_SOURCES = odp_queue_perf.c +odp_sched_perf_SOURCES = odp_sched_perf.c
# l2fwd test depends on generator example EXTRA_odp_l2fwd_DEPENDENCIES = example-generator diff --git a/test/performance/odp_sched_perf.c b/test/performance/odp_sched_perf.c new file mode 100644 index 00000000..e76725cc --- /dev/null +++ b/test/performance/odp_sched_perf.c @@ -0,0 +1,559 @@ +/* Copyright (c) 2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdlib.h> +#include <getopt.h> + +#include <odp_api.h> +#include <odp/helper/odph_api.h> + +typedef struct test_options_t { + uint32_t num_cpu; + uint32_t num_event; + uint32_t num_round; + uint32_t max_burst; + int queue_type; + +} test_options_t; + +typedef struct test_stat_t { + uint64_t rounds; + uint64_t enqueues; + uint64_t events; + uint64_t nsec; + uint64_t cycles; + +} test_stat_t; + +typedef struct test_global_t { + test_options_t test_options; + + odp_barrier_t barrier; + odp_pool_t pool; + odp_cpumask_t cpumask; + odp_queue_t queue[ODP_THREAD_COUNT_MAX]; + odph_odpthread_t thread_tbl[ODP_THREAD_COUNT_MAX]; + test_stat_t stat[ODP_THREAD_COUNT_MAX]; + +} test_global_t; + +test_global_t test_global; + +static void print_usage(void) +{ + printf("\n" + "Scheduler performance test\n" + "\n" + "Usage: odp_sched_perf [options]\n" + "\n" + " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default 1.\n" + " -e, --num_event Number of events per queue\n" + " -r, --num_round Number of rounds\n" + " -b, --burst Maximum number of events per operation\n" + " -t, --type Queue type. 0: parallel, 1: atomic, 2: ordered. Default 0.\n" + " -h, --help This help\n" + "\n"); +} + +static int parse_options(int argc, char *argv[], test_options_t *test_options) +{ + int opt; + int long_index; + int ret = 0; + + static const struct option longopts[] = { + {"num_cpu", required_argument, NULL, 'c'}, + {"num_event", required_argument, NULL, 'e'}, + {"num_round", required_argument, NULL, 'r'}, + {"burst", required_argument, NULL, 'b'}, + {"type", required_argument, NULL, 't'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + static const char *shortopts = "+c:e:r:b:t:h"; + + test_options->num_cpu = 1; + test_options->num_event = 100; + test_options->num_round = 100000; + test_options->max_burst = 100; + test_options->queue_type = 0; + + while (1) { + opt = getopt_long(argc, argv, shortopts, longopts, &long_index); + + if (opt == -1) + break; + + switch (opt) { + case 'c': + test_options->num_cpu = atoi(optarg); + break; + case 'e': + test_options->num_event = atoi(optarg); + break; + case 'r': + test_options->num_round = atoi(optarg); + break; + case 'b': + test_options->max_burst = atoi(optarg); + break; + case 't': + test_options->queue_type = atoi(optarg); + break; + case 'h': + /* fall through */ + default: + print_usage(); + ret = -1; + break; + } + } + + return ret; +} + +static int set_num_cpu(test_global_t *global) +{ + int ret; + test_options_t *test_options = &global->test_options; + int num_cpu = test_options->num_cpu; + + /* One thread used for the main thread */ + if (num_cpu > ODP_THREAD_COUNT_MAX - 1) { + printf("Error: Too many workers. Maximum is %i.\n", + ODP_THREAD_COUNT_MAX - 1); + return -1; + } + + ret = odp_cpumask_default_worker(&global->cpumask, num_cpu); + + if (num_cpu && ret != num_cpu) { + printf("Error: Too many workers. Max supported %i\n.", ret); + return -1; + } + + /* Zero: all available workers */ + if (num_cpu == 0) { + num_cpu = ret; + test_options->num_cpu = num_cpu; + } + + odp_barrier_init(&global->barrier, num_cpu); + + return 0; +} + +static int create_pool(test_global_t *global) +{ + odp_pool_capability_t pool_capa; + odp_pool_param_t pool_param; + odp_pool_t pool; + test_options_t *test_options = &global->test_options; + uint32_t num_event = test_options->num_event; + uint32_t num_round = test_options->num_round; + uint32_t max_burst = test_options->max_burst; + int num_cpu = test_options->num_cpu; + uint32_t tot_event = num_event * num_cpu; + + printf("\nScheduler performance test\n"); + printf(" num cpu %i\n", num_cpu); + printf(" num rounds %u\n", num_round); + printf(" num events %u\n", tot_event); + printf(" events per queue %u\n", num_event); + printf(" max burst %u\n", max_burst); + + if (odp_pool_capability(&pool_capa)) { + printf("Error: Pool capa failed.\n"); + return -1; + } + + if (tot_event > pool_capa.buf.max_num) { + printf("Max events supported %u\n", pool_capa.buf.max_num); + return -1; + } + + odp_pool_param_init(&pool_param); + pool_param.type = ODP_POOL_BUFFER; + pool_param.buf.num = tot_event; + + pool = odp_pool_create("sched perf", &pool_param); + + if (pool == ODP_POOL_INVALID) { + printf("Error: Pool create failed.\n"); + return -1; + } + + global->pool = pool; + + return 0; +} + +static int create_queues(test_global_t *global) +{ + odp_queue_capability_t queue_capa; + odp_queue_param_t queue_param; + odp_queue_t queue; + odp_buffer_t buf; + odp_schedule_sync_t sync; + const char *type_str; + uint32_t i, j; + test_options_t *test_options = &global->test_options; + uint32_t num_event = test_options->num_event; + uint32_t num_queue = test_options->num_cpu; + int type = test_options->queue_type; + odp_pool_t pool = global->pool; + + if (type == 0) { + type_str = "parallel"; + sync = ODP_SCHED_SYNC_PARALLEL; + } else if (type == 1) { + type_str = "atomic"; + sync = ODP_SCHED_SYNC_ATOMIC; + } else { + type_str = "ordered"; + sync = ODP_SCHED_SYNC_ORDERED; + } + + printf(" num queues %u\n", num_queue); + printf(" queue type %s\n\n", type_str); + + if (odp_queue_capability(&queue_capa)) { + printf("Error: Queue capa failed.\n"); + return -1; + } + + if (num_queue > queue_capa.sched.max_num) { + printf("Max queues supported %u\n", queue_capa.sched.max_num); + return -1; + } + + if (queue_capa.sched.max_size && + num_event > queue_capa.sched.max_size) { + printf("Max events per queue %u\n", queue_capa.sched.max_size); + return -1; + } + + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) + global->queue[i] = ODP_QUEUE_INVALID; + + odp_queue_param_init(&queue_param); + queue_param.type = ODP_QUEUE_TYPE_SCHED; + queue_param.sched.prio = ODP_SCHED_PRIO_DEFAULT; + queue_param.sched.sync = sync; + queue_param.sched.group = ODP_SCHED_GROUP_ALL; + queue_param.size = num_event; + + for (i = 0; i < num_queue; i++) { + queue = odp_queue_create(NULL, &queue_param); + + if (queue == ODP_QUEUE_INVALID) { + printf("Error: Queue create failed %u\n", i); + return -1; + } + + global->queue[i] = queue; + } + + for (i = 0; i < num_queue; i++) { + queue = global->queue[i]; + + for (j = 0; j < num_event; j++) { + buf = odp_buffer_alloc(pool); + + if (buf == ODP_BUFFER_INVALID) { + printf("Error: Alloc failed %u/%u\n", i, j); + return -1; + } + + if (odp_queue_enq(queue, odp_buffer_to_event(buf))) { + printf("Error: Enqueue failed %u/%u\n", i, j); + return -1; + } + } + } + + return 0; +} + +static int destroy_queues(test_global_t *global) +{ + uint32_t i; + odp_event_t ev; + uint64_t wait; + + wait = odp_schedule_wait_time(200 * ODP_TIME_MSEC_IN_NS); + + while ((ev = odp_schedule(NULL, wait)) != ODP_EVENT_INVALID) + odp_event_free(ev); + + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + if (global->queue[i] != ODP_QUEUE_INVALID) { + if (odp_queue_destroy(global->queue[i])) { + printf("Error: Queue destroy failed %u\n", i); + return -1; + } + } + } + + return 0; +} + +static int test_sched(void *arg) +{ + int num, num_enq, ret, thr; + uint32_t i, rounds; + uint64_t c1, c2, cycles, nsec; + uint64_t events, enqueues; + odp_time_t t1, t2; + odp_queue_t queue; + test_global_t *global = arg; + test_options_t *test_options = &global->test_options; + uint32_t num_round = test_options->num_round; + uint32_t max_burst = test_options->max_burst; + odp_event_t ev[max_burst]; + + thr = odp_thread_id(); + + for (i = 0; i < max_burst; i++) + ev[i] = ODP_EVENT_INVALID; + + enqueues = 0; + events = 0; + ret = 0; + + /* Start all workers at the same time */ + odp_barrier_wait(&global->barrier); + + t1 = odp_time_local(); + c1 = odp_cpu_cycles(); + + for (rounds = 0; rounds < num_round; rounds++) { + num = odp_schedule_multi(&queue, ODP_SCHED_NO_WAIT, + ev, max_burst); + + if (odp_likely(num > 0)) { + events += num; + i = 0; + + while (num) { + num_enq = odp_queue_enq_multi(queue, &ev[i], + num); + + if (num_enq < 0) { + printf("Error: Enqueue failed. Round %u\n", + rounds); + ret = -1; + break; + } + + num -= num_enq; + i += num_enq; + enqueues++; + } + + if (odp_unlikely(ret)) + break; + + continue; + } + + /* <0 not specified as an error but checking anyway */ + if (num < 0) { + printf("Error: Sched failed. Round %u\n", rounds); + ret = -1; + break; + } + } + + c2 = odp_cpu_cycles(); + t2 = odp_time_local(); + + nsec = odp_time_diff_ns(t2, t1); + cycles = odp_cpu_cycles_diff(c2, c1); + + /* Update stats*/ + global->stat[thr].rounds = rounds; + global->stat[thr].enqueues = enqueues; + global->stat[thr].events = events; + global->stat[thr].nsec = nsec; + global->stat[thr].cycles = cycles; + + /* Pause scheduling before thread exit */ + odp_schedule_pause(); + + while (1) { + ev[0] = odp_schedule(&queue, ODP_SCHED_NO_WAIT); + + if (ev[0] == ODP_EVENT_INVALID) + break; + + odp_queue_enq(queue, ev[0]); + } + + return ret; +} + +static int start_workers(test_global_t *global, odp_instance_t instance) +{ + odph_odpthread_params_t thr_params; + test_options_t *test_options = &global->test_options; + int num_cpu = test_options->num_cpu; + + memset(&thr_params, 0, sizeof(thr_params)); + thr_params.thr_type = ODP_THREAD_WORKER; + thr_params.instance = instance; + thr_params.start = test_sched; + thr_params.arg = global; + + if (odph_odpthreads_create(global->thread_tbl, &global->cpumask, + &thr_params) != num_cpu) + return -1; + + return 0; +} + +static void print_stat(test_global_t *global) +{ + int i, num; + double rounds_ave, enqueues_ave, events_ave, nsec_ave, cycles_ave; + test_options_t *test_options = &global->test_options; + int num_cpu = test_options->num_cpu; + uint64_t rounds_sum = 0; + uint64_t enqueues_sum = 0; + uint64_t events_sum = 0; + uint64_t nsec_sum = 0; + uint64_t cycles_sum = 0; + + /* Averages */ + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + rounds_sum += global->stat[i].rounds; + enqueues_sum += global->stat[i].enqueues; + events_sum += global->stat[i].events; + nsec_sum += global->stat[i].nsec; + cycles_sum += global->stat[i].cycles; + } + + if (rounds_sum == 0) { + printf("No results.\n"); + return; + } + + rounds_ave = rounds_sum / num_cpu; + enqueues_ave = enqueues_sum / num_cpu; + events_ave = events_sum / num_cpu; + nsec_ave = nsec_sum / num_cpu; + cycles_ave = cycles_sum / num_cpu; + num = 0; + + printf("RESULTS - per thread (Million events per sec):\n"); + printf("----------------------------------------------\n"); + printf(" 1 2 3 4 5 6 7 8 9 10"); + + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + if (global->stat[i].rounds) { + if ((num % 10) == 0) + printf("\n "); + + printf("%6.1f ", (1000.0 * global->stat[i].events) / + global->stat[i].nsec); + num++; + } + } + printf("\n\n"); + + printf("RESULTS - average over %i threads:\n", num_cpu); + printf("----------------------------------\n"); + printf(" schedule calls: %.3f\n", rounds_ave); + printf(" enqueue calls: %.3f\n", enqueues_ave); + printf(" duration: %.3f msec\n", nsec_ave / 1000000); + printf(" num cycles: %.3f M\n", cycles_ave / 1000000); + printf(" cycles per round: %.3f\n", + cycles_ave / rounds_ave); + printf(" cycles per event: %.3f\n", + cycles_ave / events_ave); + printf(" ave events received: %.3f\n", + events_ave / rounds_ave); + printf(" rounds per sec: %.3f M\n", + (1000.0 * rounds_ave) / nsec_ave); + printf(" events per sec: %.3f M\n\n", + (1000.0 * events_ave) / nsec_ave); +} + +int main(int argc, char **argv) +{ + odp_instance_t instance; + odp_init_t init; + test_global_t *global; + + global = &test_global; + memset(global, 0, sizeof(test_global_t)); + global->pool = ODP_POOL_INVALID; + + if (parse_options(argc, argv, &global->test_options)) + return -1; + + /* List features not to be used */ + odp_init_param_init(&init); + init.not_used.feat.cls = 1; + init.not_used.feat.crypto = 1; + init.not_used.feat.ipsec = 1; + init.not_used.feat.timer = 1; + init.not_used.feat.tm = 1; + + /* Init ODP before calling anything else */ + if (odp_init_global(&instance, &init, NULL)) { + printf("Error: Global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local(instance, ODP_THREAD_CONTROL)) { + printf("Error: Local init failed.\n"); + return -1; + } + + if (set_num_cpu(global)) + return -1; + + if (create_pool(global)) + return -1; + + if (create_queues(global)) + return -1; + + /* Start workers */ + start_workers(global, instance); + + /* Wait workers to exit */ + odph_odpthreads_join(global->thread_tbl); + + if (destroy_queues(global)) + return -1; + + print_stat(global); + + if (odp_pool_destroy(global->pool)) { + printf("Error: Pool destroy failed.\n"); + return -1; + } + + if (odp_term_local()) { + printf("Error: term local failed.\n"); + return -1; + } + + if (odp_term_global(instance)) { + printf("Error: term global failed.\n"); + return -1; + } + + return 0; +}
commit bd2775a49b137034294085b0fc5bf129accc1ea8 Author: Petri Savolainen petri.savolainen@linaro.org Date: Mon Jul 2 10:59:21 2018 +0300
test: pool_perf: add new pool performance test
Standalone test for pool performance testing with multiple threads.
Signed-off-by: Petri Savolainen petri.savolainen@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/test/performance/.gitignore b/test/performance/.gitignore index febe86ed..b99ba2ab 100644 --- a/test/performance/.gitignore +++ b/test/performance/.gitignore @@ -7,6 +7,7 @@ odp_ipsec odp_l2fwd odp_pktio_ordered odp_pktio_perf +odp_pool_perf odp_queue_perf odp_sched_latency odp_sched_pktio diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index 5b45f40e..a8e0f439 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -6,6 +6,7 @@ EXECUTABLES = odp_bench_packet \ odp_crypto \ odp_ipsec \ odp_pktio_perf \ + odp_pool_perf \ odp_queue_perf
COMPILE_ONLY = odp_l2fwd \ @@ -39,6 +40,7 @@ odp_sched_latency_SOURCES = odp_sched_latency.c odp_sched_pktio_SOURCES = odp_sched_pktio.c odp_scheduling_SOURCES = odp_scheduling.c odp_pktio_perf_SOURCES = odp_pktio_perf.c +odp_pool_perf_SOURCES = odp_pool_perf.c odp_queue_perf_SOURCES = odp_queue_perf.c
# l2fwd test depends on generator example diff --git a/test/performance/odp_pool_perf.c b/test/performance/odp_pool_perf.c new file mode 100644 index 00000000..4a77f327 --- /dev/null +++ b/test/performance/odp_pool_perf.c @@ -0,0 +1,401 @@ +/* Copyright (c) 2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdlib.h> +#include <getopt.h> + +#include <odp_api.h> +#include <odp/helper/odph_api.h> + +typedef struct test_options_t { + uint32_t num_cpu; + uint32_t num_event; + uint32_t num_round; + uint32_t max_burst; + +} test_options_t; + +typedef struct test_stat_t { + uint64_t rounds; + uint64_t frees; + uint64_t events; + uint64_t nsec; + uint64_t cycles; + +} test_stat_t; + +typedef struct test_global_t { + test_options_t test_options; + + odp_barrier_t barrier; + odp_pool_t pool; + odp_cpumask_t cpumask; + odph_odpthread_t thread_tbl[ODP_THREAD_COUNT_MAX]; + test_stat_t stat[ODP_THREAD_COUNT_MAX]; + +} test_global_t; + +test_global_t test_global; + +static void print_usage(void) +{ + printf("\n" + "Pool performance test\n" + "\n" + "Usage: odp_pool_perf [options]\n" + "\n" + " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default 1.\n" + " -e, --num_event Number of events\n" + " -r, --num_round Number of rounds\n" + " -b, --burst Maximum number of events per operation\n" + " -h, --help This help\n" + "\n"); +} + +static int parse_options(int argc, char *argv[], test_options_t *test_options) +{ + int opt; + int long_index; + int ret = 0; + + static const struct option longopts[] = { + {"num_cpu", required_argument, NULL, 'c'}, + {"num_event", required_argument, NULL, 'e'}, + {"num_round", required_argument, NULL, 'r'}, + {"burst", required_argument, NULL, 'b'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + static const char *shortopts = "+c:e:r:b:h"; + + test_options->num_cpu = 1; + test_options->num_event = 1000; + test_options->num_round = 100000; + test_options->max_burst = 100; + + while (1) { + opt = getopt_long(argc, argv, shortopts, longopts, &long_index); + + if (opt == -1) + break; + + switch (opt) { + case 'c': + test_options->num_cpu = atoi(optarg); + break; + case 'e': + test_options->num_event = atoi(optarg); + break; + case 'r': + test_options->num_round = atoi(optarg); + break; + case 'b': + test_options->max_burst = atoi(optarg); + break; + case 'h': + /* fall through */ + default: + print_usage(); + ret = -1; + break; + } + } + + return ret; +} + +static int set_num_cpu(test_global_t *global) +{ + int ret; + test_options_t *test_options = &global->test_options; + int num_cpu = test_options->num_cpu; + + /* One thread used for the main thread */ + if (num_cpu > ODP_THREAD_COUNT_MAX - 1) { + printf("Error: Too many workers. Maximum is %i.\n", + ODP_THREAD_COUNT_MAX - 1); + return -1; + } + + ret = odp_cpumask_default_worker(&global->cpumask, num_cpu); + + if (num_cpu && ret != num_cpu) { + printf("Error: Too many workers. Max supported %i\n.", ret); + return -1; + } + + /* Zero: all available workers */ + if (num_cpu == 0) { + num_cpu = ret; + test_options->num_cpu = num_cpu; + } + + odp_barrier_init(&global->barrier, num_cpu); + + return 0; +} + +static int create_pool(test_global_t *global) +{ + odp_pool_capability_t pool_capa; + odp_pool_param_t pool_param; + odp_pool_t pool; + test_options_t *test_options = &global->test_options; + uint32_t num_event = test_options->num_event; + uint32_t num_round = test_options->num_round; + uint32_t max_burst = test_options->max_burst; + uint32_t num_cpu = test_options->num_cpu; + + printf("\nPool performance test\n"); + printf(" num cpu %u\n", num_cpu); + printf(" num rounds %u\n", num_round); + printf(" num events %u\n", num_event); + printf(" max burst %u\n\n", max_burst); + + if (odp_pool_capability(&pool_capa)) { + printf("Error: Pool capa failed.\n"); + return -1; + } + + if (num_event > pool_capa.buf.max_num) { + printf("Max events supported %u\n", pool_capa.buf.max_num); + return -1; + } + + odp_pool_param_init(&pool_param); + pool_param.type = ODP_POOL_BUFFER; + pool_param.buf.num = num_event; + + pool = odp_pool_create("pool perf", &pool_param); + + if (pool == ODP_POOL_INVALID) { + printf("Error: Pool create failed.\n"); + return -1; + } + + global->pool = pool; + + return 0; +} + +static int test_pool(void *arg) +{ + int num, ret, thr; + uint32_t i, rounds; + uint64_t c1, c2, cycles, nsec; + uint64_t events, frees; + odp_time_t t1, t2; + test_global_t *global = arg; + test_options_t *test_options = &global->test_options; + uint32_t num_round = test_options->num_round; + uint32_t max_burst = test_options->max_burst; + odp_pool_t pool = global->pool; + odp_buffer_t buf[max_burst]; + + thr = odp_thread_id(); + + for (i = 0; i < max_burst; i++) + buf[i] = ODP_BUFFER_INVALID; + + events = 0; + frees = 0; + ret = 0; + + /* Start all workers at the same time */ + odp_barrier_wait(&global->barrier); + + t1 = odp_time_local(); + c1 = odp_cpu_cycles(); + + for (rounds = 0; rounds < num_round; rounds++) { + num = odp_buffer_alloc_multi(pool, buf, max_burst); + + if (odp_likely(num > 0)) { + events += num; + odp_buffer_free_multi(buf, num); + frees++; + continue; + } + + if (num < 0) { + printf("Error: Alloc failed. Round %u\n", rounds); + ret = -1; + break; + } + } + + c2 = odp_cpu_cycles(); + t2 = odp_time_local(); + + nsec = odp_time_diff_ns(t2, t1); + cycles = odp_cpu_cycles_diff(c2, c1); + + /* Update stats*/ + global->stat[thr].rounds = rounds; + global->stat[thr].frees = frees; + global->stat[thr].events = events; + global->stat[thr].nsec = nsec; + global->stat[thr].cycles = cycles; + + return ret; +} + +static int start_workers(test_global_t *global, odp_instance_t instance) +{ + odph_odpthread_params_t thr_params; + test_options_t *test_options = &global->test_options; + int num_cpu = test_options->num_cpu; + + memset(&thr_params, 0, sizeof(thr_params)); + thr_params.thr_type = ODP_THREAD_WORKER; + thr_params.instance = instance; + thr_params.start = test_pool; + thr_params.arg = global; + + if (odph_odpthreads_create(global->thread_tbl, &global->cpumask, + &thr_params) != num_cpu) + return -1; + + return 0; +} + +static void print_stat(test_global_t *global) +{ + int i, num; + double rounds_ave, frees_ave, events_ave, nsec_ave, cycles_ave; + test_options_t *test_options = &global->test_options; + int num_cpu = test_options->num_cpu; + uint64_t rounds_sum = 0; + uint64_t frees_sum = 0; + uint64_t events_sum = 0; + uint64_t nsec_sum = 0; + uint64_t cycles_sum = 0; + + /* Averages */ + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + rounds_sum += global->stat[i].rounds; + frees_sum += global->stat[i].frees; + events_sum += global->stat[i].events; + nsec_sum += global->stat[i].nsec; + cycles_sum += global->stat[i].cycles; + } + + if (rounds_sum == 0) { + printf("No results.\n"); + return; + } + + rounds_ave = rounds_sum / num_cpu; + frees_ave = frees_sum / num_cpu; + events_ave = events_sum / num_cpu; + nsec_ave = nsec_sum / num_cpu; + cycles_ave = cycles_sum / num_cpu; + num = 0; + + printf("RESULTS - per thread (Million events per sec):\n"); + printf("----------------------------------------------\n"); + printf(" 1 2 3 4 5 6 7 8 9 10"); + + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + if (global->stat[i].rounds) { + if ((num % 10) == 0) + printf("\n "); + + printf("%6.1f ", (1000.0 * global->stat[i].events) / + global->stat[i].nsec); + num++; + } + } + printf("\n\n"); + + printf("RESULTS - average over %i threads:\n", num_cpu); + printf("----------------------------------\n"); + printf(" alloc calls: %.3f\n", rounds_ave); + printf(" free calls: %.3f\n", frees_ave); + printf(" duration: %.3f msec\n", nsec_ave / 1000000); + printf(" num cycles: %.3f M\n", cycles_ave / 1000000); + printf(" cycles per round: %.3f\n", + cycles_ave / rounds_ave); + printf(" cycles per event: %.3f\n", + cycles_ave / events_ave); + printf(" ave events allocated: %.3f\n", + events_ave / rounds_ave); + printf(" operations per sec: %.3f M\n", + (1000.0 * rounds_ave) / nsec_ave); + printf(" events per sec: %.3f M\n\n", + (1000.0 * events_ave) / nsec_ave); +} + +int main(int argc, char **argv) +{ + odp_instance_t instance; + odp_init_t init; + test_global_t *global; + + global = &test_global; + memset(global, 0, sizeof(test_global_t)); + global->pool = ODP_POOL_INVALID; + + if (parse_options(argc, argv, &global->test_options)) + return -1; + + /* List features not to be used */ + odp_init_param_init(&init); + init.not_used.feat.cls = 1; + init.not_used.feat.crypto = 1; + init.not_used.feat.ipsec = 1; + init.not_used.feat.schedule = 1; + init.not_used.feat.timer = 1; + init.not_used.feat.tm = 1; + + /* Init ODP before calling anything else */ + if (odp_init_global(&instance, &init, NULL)) { + printf("Error: Global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local(instance, ODP_THREAD_CONTROL)) { + printf("Error: Local init failed.\n"); + return -1; + } + + if (set_num_cpu(global)) + return -1; + + if (create_pool(global)) + return -1; + + /* Start workers */ + start_workers(global, instance); + + /* Wait workers to exit */ + odph_odpthreads_join(global->thread_tbl); + + print_stat(global); + + if (odp_pool_destroy(global->pool)) { + printf("Error: Pool destroy failed.\n"); + return -1; + } + + if (odp_term_local()) { + printf("Error: term local failed.\n"); + return -1; + } + + if (odp_term_global(instance)) { + printf("Error: term global failed.\n"); + return -1; + } + + return 0; +}
-----------------------------------------------------------------------
Summary of changes: platform/linux-generic/include/odp_ring_internal.h | 43 +- test/performance/.gitignore | 2 + test/performance/Makefile.am | 6 +- test/performance/odp_pool_perf.c | 401 +++++++++++++++ test/performance/odp_sched_perf.c | 559 +++++++++++++++++++++ 5 files changed, 976 insertions(+), 35 deletions(-) create mode 100644 test/performance/odp_pool_perf.c create mode 100644 test/performance/odp_sched_perf.c
hooks/post-receive