[COMMIT] branch master updated. v1.14.0.0-42-g0a94dd33 - git-lng-odp

24 Apr 2017

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "".
The branch, master has been updated
       via  0a94dd3322dcbbd791e45647b238997553bfd1bc (commit)
       via  b12bfc6ebd9e7785d069ebea2861d7ac6ca237dd (commit)
       via  43de90c46e321f00ed7bf3f66c0c3c2ad41b5afd (commit)
      from  8ddca7c5d2d1f6b4311501e3ce6eef6d22e40591 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 0a94dd3322dcbbd791e45647b238997553bfd1bc
Author: Petri Savolainen petri.savolainen@linaro.org
Date:   Thu Apr 6 14:59:04 2017 +0300
linux-gen: sched: optimize group scheduling
Use separate priority queues for different groups. Sharing
    the same priority queue over multiple groups caused multiple
    issues:
    * latency and ordering issues when threads push back
      events (from wrong groups) to the tail of the priority queue
    * unnecessary contention (scaling issues) when threads belong
      to different groups
Lowered the maximum number of groups from 256 to 32 (in the default
    configuration) to limit memory usage of priority queues. This should
    be enough for the most users.
Signed-off-by: Petri Savolainen petri.savolainen@linaro.org
    Reviewed-and-tested-by: Carl Wallén carl.wallen@nokia.com
    Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org

diff --git a/platform/linux-generic/odp_schedule.c b/platform/linux-generic/odp_schedule.c
index e7079b95..f366e7ed 100644
--- a/platform/linux-generic/odp_schedule.c
+++ b/platform/linux-generic/odp_schedule.c
@@ -34,7 +34,7 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) &&
    	  "normal_prio_is_not_between_highest_and_lowest");
/* Number of scheduling groups */
-#define NUM_SCHED_GRPS 256
+#define NUM_SCHED_GRPS 32
/* Priority queues per priority */
 #define QUEUES_PER_PRIO  4
@@ -163,7 +163,11 @@ typedef struct {
    	ordered_stash_t stash[MAX_ORDERED_STASH];
    } ordered;
+	uint32_t grp_epoch;
+	int num_grp;
+	uint8_t grp[NUM_SCHED_GRPS];
    uint8_t weight_tbl[WEIGHT_TBL_SIZE];
+	uint8_t grp_weight[WEIGHT_TBL_SIZE];
} sched_local_t;
@@ -199,7 +203,7 @@ typedef struct {
    pri_mask_t     pri_mask[NUM_PRIO];
    odp_spinlock_t mask_lock;
-	prio_queue_t   prio_q[NUM_PRIO][QUEUES_PER_PRIO];
+	prio_queue_t   prio_q[NUM_SCHED_GRPS][NUM_PRIO][QUEUES_PER_PRIO];
odp_spinlock_t poll_cmd_lock;
    /* Number of commands in a command queue */
@@ -214,8 +218,10 @@ typedef struct {
    odp_shm_t      shm;
    uint32_t       pri_count[NUM_PRIO][QUEUES_PER_PRIO];
-	odp_spinlock_t grp_lock;
-	odp_thrmask_t mask_all;
+	odp_thrmask_t    mask_all;
+	odp_spinlock_t   grp_lock;
+	odp_atomic_u32_t grp_epoch;
+
    struct {
    	char           name[ODP_SCHED_GROUP_NAME_LEN];
    	odp_thrmask_t  mask;
@@ -223,6 +229,7 @@ typedef struct {
    } sched_grp[NUM_SCHED_GRPS];
struct {
+		int         grp;
    	int         prio;
    	int         queue_per_prio;
    } queue[ODP_CONFIG_QUEUES];
@@ -273,7 +280,7 @@ static void sched_local_init(void)
 static int schedule_init_global(void)
 {
    odp_shm_t shm;
-	int i, j;
+	int i, j, grp;
ODP_DBG("Schedule init ... ");
@@ -293,15 +300,20 @@ static int schedule_init_global(void)
    sched->shm  = shm;
    odp_spinlock_init(&sched->mask_lock);
-	for (i = 0; i < NUM_PRIO; i++) {
-		for (j = 0; j < QUEUES_PER_PRIO; j++) {
-			int k;
+	for (grp = 0; grp < NUM_SCHED_GRPS; grp++) {
+		for (i = 0; i < NUM_PRIO; i++) {
+			for (j = 0; j < QUEUES_PER_PRIO; j++) {
+				prio_queue_t *prio_q;
+				int k;
-			ring_init(&sched->prio_q[i][j].ring);
+				prio_q = &sched->prio_q[grp][i][j];
+				ring_init(&prio_q->ring);
-			for (k = 0; k < PRIO_QUEUE_RING_SIZE; k++)
-				sched->prio_q[i][j].queue_index[k] =
-				PRIO_QUEUE_EMPTY;
+				for (k = 0; k < PRIO_QUEUE_RING_SIZE; k++) {
+					prio_q->queue_index[k] =
+					PRIO_QUEUE_EMPTY;
+				}
+			}
    	}
    }
@@ -317,12 +329,17 @@ static int schedule_init_global(void)
    	sched->pktio_cmd[i].cmd_index = PKTIO_CMD_FREE;
odp_spinlock_init(&sched->grp_lock);
+	odp_atomic_init_u32(&sched->grp_epoch, 0);
for (i = 0; i < NUM_SCHED_GRPS; i++) {
    	memset(sched->sched_grp[i].name, 0, ODP_SCHED_GROUP_NAME_LEN);
    	odp_thrmask_zero(&sched->sched_grp[i].mask);
    }
+	sched->sched_grp[ODP_SCHED_GROUP_ALL].allocated = 1;
+	sched->sched_grp[ODP_SCHED_GROUP_WORKER].allocated = 1;
+	sched->sched_grp[ODP_SCHED_GROUP_CONTROL].allocated = 1;
+
    odp_thrmask_setall(&sched->mask_all);
ODP_DBG("done\n");
@@ -330,29 +347,38 @@ static int schedule_init_global(void)
    return 0;
 }
+static inline void queue_destroy_finalize(uint32_t qi)
+{
+	sched_cb_queue_destroy_finalize(qi);
+}
+
 static int schedule_term_global(void)
 {
    int ret = 0;
    int rc = 0;
-	int i, j;
+	int i, j, grp;
-	for (i = 0; i < NUM_PRIO; i++) {
-		for (j = 0; j < QUEUES_PER_PRIO; j++) {
-			ring_t *ring = &sched->prio_q[i][j].ring;
-			uint32_t qi;
+	for (grp = 0; grp < NUM_SCHED_GRPS; grp++) {
+		for (i = 0; i < NUM_PRIO; i++) {
+			for (j = 0; j < QUEUES_PER_PRIO; j++) {
+				ring_t *ring = &sched->prio_q[grp][i][j].ring;
+				uint32_t qi;
-			while ((qi = ring_deq(ring, PRIO_QUEUE_MASK)) !=
-			       RING_EMPTY) {
-				odp_event_t events[1];
-				int num;
+				while ((qi = ring_deq(ring, PRIO_QUEUE_MASK)) !=
+				       RING_EMPTY) {
+					odp_event_t events[1];
+					int num;
-				num = sched_cb_queue_deq_multi(qi, events, 1);
+					num = sched_cb_queue_deq_multi(qi,
+								       events,
+								       1);
-				if (num < 0)
-					sched_cb_queue_destroy_finalize(qi);
+					if (num < 0)
+						queue_destroy_finalize(qi);
-				if (num > 0)
-					ODP_ERR("Queue not empty\n");
+					if (num > 0)
+						ODP_ERR("Queue not empty\n");
+				}
    		}
    	}
    }
@@ -383,6 +409,40 @@ static int schedule_term_local(void)
    return 0;
 }
+static inline void grp_update_mask(int grp, const odp_thrmask_t *new_mask)
+{
+	odp_thrmask_copy(&sched->sched_grp[grp].mask, new_mask);
+	odp_atomic_add_rel_u32(&sched->grp_epoch, 1);
+}
+
+static inline int grp_update_tbl(void)
+{
+	int i;
+	int num = 0;
+	int thr = sched_local.thr;
+
+	odp_spinlock_lock(&sched->grp_lock);
+
+	for (i = 0; i < NUM_SCHED_GRPS; i++) {
+		if (sched->sched_grp[i].allocated == 0)
+			continue;
+
+		if (odp_thrmask_isset(&sched->sched_grp[i].mask, thr)) {
+			sched_local.grp[num] = i;
+			num++;
+		}
+	}
+
+	odp_spinlock_unlock(&sched->grp_lock);
+
+	/* Update group weights. Round robin over all thread's groups. */
+	for (i = 0; i < WEIGHT_TBL_SIZE; i++)
+		sched_local.grp_weight[i] = i % num;
+
+	sched_local.num_grp = num;
+	return num;
+}
+
 static unsigned schedule_max_ordered_locks(void)
 {
    return MAX_ORDERED_LOCKS_PER_QUEUE;
@@ -433,6 +493,7 @@ static int schedule_init_queue(uint32_t queue_index,
    int prio = sched_param->prio;
pri_set_queue(queue_index, prio);
+	sched->queue[queue_index].grp  = sched_param->group;
    sched->queue[queue_index].prio = prio;
    sched->queue[queue_index].queue_per_prio = queue_per_prio(queue_index);
@@ -444,6 +505,7 @@ static void schedule_destroy_queue(uint32_t queue_index)
    int prio = sched->queue[queue_index].prio;
pri_clr_queue(queue_index, prio);
+	sched->queue[queue_index].grp = 0;
    sched->queue[queue_index].prio = 0;
    sched->queue[queue_index].queue_per_prio = 0;
 }
@@ -535,9 +597,10 @@ static void schedule_release_atomic(void)
    uint32_t qi = sched_local.queue_index;
if (qi != PRIO_QUEUE_EMPTY && sched_local.num  == 0) {
-		int prio           = sched->queue[qi].prio;
+		int grp = sched->queue[qi].grp;
+		int prio = sched->queue[qi].prio;
    	int queue_per_prio = sched->queue[qi].queue_per_prio;
-		ring_t *ring       = &sched->prio_q[prio][queue_per_prio].ring;
+		ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring;
/* Release current atomic queue */
    	ring_enq(ring, PRIO_QUEUE_MASK, qi);
@@ -688,42 +751,14 @@ static int schedule_ord_enq_multi(uint32_t queue_index, void *buf_hdr[],
    return 1;
 }
-/*
- * Schedule queues
- */
-static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
-		       unsigned int max_num)
+static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[],
+				  unsigned int max_num, int grp, int first)
 {
    int prio, i;
    int ret;
-	int id, first;
+	int id;
    unsigned int max_deq = MAX_DEQ;
    uint32_t qi;
-	uint16_t round;
-
-	if (sched_local.num) {
-		ret = copy_events(out_ev, max_num);
-
-		if (out_queue)
-			*out_queue = sched_local.queue;
-
-		return ret;
-	}
-
-	schedule_release_context();
-
-	if (odp_unlikely(sched_local.pause))
-		return 0;
-
-	/* Each thread prefers a priority queue. Poll weight table avoids
-	 * starvation of other priority queues on low thread counts. */
-	round = sched_local.round + 1;
-
-	if (odp_unlikely(round == WEIGHT_TBL_SIZE))
-		round = 0;
-
-	sched_local.round = round;
-	first = sched_local.weight_tbl[round];
/* Schedule events */
    for (prio = 0; prio < NUM_PRIO; prio++) {
@@ -736,7 +771,6 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
for (i = 0; i < QUEUES_PER_PRIO;) {
    		int num;
-			int grp;
    		int ordered;
    		odp_queue_t handle;
    		ring_t *ring;
@@ -753,7 +787,7 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
    		}
/* Get queue index from the priority queue */
-			ring = &sched->prio_q[prio][id].ring;
+			ring = &sched->prio_q[grp][prio][id].ring;
    		qi   = ring_deq(ring, PRIO_QUEUE_MASK);
/* Priority queue empty */
@@ -763,21 +797,6 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
    			continue;
    		}
-			grp = sched_cb_queue_grp(qi);
-
-			if (grp > ODP_SCHED_GROUP_ALL &&
-			    !odp_thrmask_isset(&sched->sched_grp[grp].mask,
-					       sched_local.thr)) {
-				/* This thread is not eligible for work from
-				 * this queue, so continue scheduling it.
-				 */
-				ring_enq(ring, PRIO_QUEUE_MASK, qi);
-
-				i++;
-				id++;
-				continue;
-			}
-
    		/* Low priorities have smaller batch size to limit
    		 * head of line blocking latency. */
    		if (odp_unlikely(prio > ODP_SCHED_PRIO_DEFAULT))
@@ -845,6 +864,70 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
    	}
    }
+	return 0;
+}
+
+/*
+ * Schedule queues
+ */
+static inline int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
+			      unsigned int max_num)
+{
+	int i, num_grp;
+	int ret;
+	int id, first, grp_id;
+	uint16_t round;
+	uint32_t epoch;
+
+	if (sched_local.num) {
+		ret = copy_events(out_ev, max_num);
+
+		if (out_queue)
+			*out_queue = sched_local.queue;
+
+		return ret;
+	}
+
+	schedule_release_context();
+
+	if (odp_unlikely(sched_local.pause))
+		return 0;
+
+	/* Each thread prefers a priority queue. Poll weight table avoids
+	 * starvation of other priority queues on low thread counts. */
+	round = sched_local.round + 1;
+
+	if (odp_unlikely(round == WEIGHT_TBL_SIZE))
+		round = 0;
+
+	sched_local.round = round;
+	first = sched_local.weight_tbl[round];
+
+	epoch = odp_atomic_load_acq_u32(&sched->grp_epoch);
+	num_grp = sched_local.num_grp;
+
+	if (odp_unlikely(sched_local.grp_epoch != epoch)) {
+		num_grp = grp_update_tbl();
+		sched_local.grp_epoch = epoch;
+	}
+
+	grp_id = sched_local.grp_weight[round];
+
+	/* Schedule queues per group and priority */
+	for (i = 0; i < num_grp; i++) {
+		int grp;
+
+		grp = sched_local.grp[grp_id];
+		ret = do_schedule_grp(out_queue, out_ev, max_num, grp, first);
+
+		if (odp_likely(ret))
+			return ret;
+
+		grp_id++;
+		if (odp_unlikely(grp_id >= num_grp))
+			grp_id = 0;
+	}
+
    /*
     * Poll packet input when there are no events
     *   * Each thread starts the search for a poll command from its
@@ -1050,7 +1133,8 @@ static odp_schedule_group_t schedule_group_create(const char *name,
    				ODP_SCHED_GROUP_NAME_LEN - 1);
    			grp_name[ODP_SCHED_GROUP_NAME_LEN - 1] = 0;
    		}
-			odp_thrmask_copy(&sched->sched_grp[i].mask, mask);
+
+			grp_update_mask(i, mask);
    		group = (odp_schedule_group_t)i;
    		sched->sched_grp[i].allocated = 1;
    		break;
@@ -1063,13 +1147,16 @@ static odp_schedule_group_t schedule_group_create(const char *name,
static int schedule_group_destroy(odp_schedule_group_t group)
 {
+	odp_thrmask_t zero;
    int ret;
+	odp_thrmask_zero(&zero);
+
    odp_spinlock_lock(&sched->grp_lock);
if (group < NUM_SCHED_GRPS && group >= SCHED_GROUP_NAMED &&
        sched->sched_grp[group].allocated) {
-		odp_thrmask_zero(&sched->sched_grp[group].mask);
+		grp_update_mask(group, &zero);
    	memset(sched->sched_grp[group].name, 0,
    	       ODP_SCHED_GROUP_NAME_LEN);
    	sched->sched_grp[group].allocated = 0;
@@ -1109,9 +1196,11 @@ static int schedule_group_join(odp_schedule_group_t group,
if (group < NUM_SCHED_GRPS && group >= SCHED_GROUP_NAMED &&
        sched->sched_grp[group].allocated) {
-		odp_thrmask_or(&sched->sched_grp[group].mask,
-			       &sched->sched_grp[group].mask,
-			       mask);
+		odp_thrmask_t new_mask;
+
+		odp_thrmask_or(&new_mask, &sched->sched_grp[group].mask, mask);
+		grp_update_mask(group, &new_mask);
+
    	ret = 0;
    } else {
    	ret = -1;
@@ -1124,18 +1213,19 @@ static int schedule_group_join(odp_schedule_group_t group,
 static int schedule_group_leave(odp_schedule_group_t group,
    			const odp_thrmask_t *mask)
 {
+	odp_thrmask_t new_mask;
    int ret;
+	odp_thrmask_xor(&new_mask, mask, &sched->mask_all);
+
    odp_spinlock_lock(&sched->grp_lock);
if (group < NUM_SCHED_GRPS && group >= SCHED_GROUP_NAMED &&
        sched->sched_grp[group].allocated) {
-		odp_thrmask_t leavemask;
+		odp_thrmask_and(&new_mask, &sched->sched_grp[group].mask,
+				&new_mask);
+		grp_update_mask(group, &new_mask);
-		odp_thrmask_xor(&leavemask, mask, &sched->mask_all);
-		odp_thrmask_and(&sched->sched_grp[group].mask,
-				&sched->sched_grp[group].mask,
-				&leavemask);
    	ret = 0;
    } else {
    	ret = -1;
@@ -1186,12 +1276,19 @@ static int schedule_group_info(odp_schedule_group_t group,
static int schedule_thr_add(odp_schedule_group_t group, int thr)
 {
+	odp_thrmask_t mask;
+	odp_thrmask_t new_mask;
+
    if (group < 0 || group >= SCHED_GROUP_NAMED)
    	return -1;
+	odp_thrmask_zero(&mask);
+	odp_thrmask_set(&mask, thr);
+
    odp_spinlock_lock(&sched->grp_lock);
-	odp_thrmask_set(&sched->sched_grp[group].mask, thr);
+	odp_thrmask_or(&new_mask, &sched->sched_grp[group].mask, &mask);
+	grp_update_mask(group, &new_mask);
odp_spinlock_unlock(&sched->grp_lock);
@@ -1200,12 +1297,20 @@ static int schedule_thr_add(odp_schedule_group_t group, int thr)
static int schedule_thr_rem(odp_schedule_group_t group, int thr)
 {
+	odp_thrmask_t mask;
+	odp_thrmask_t new_mask;
+
    if (group < 0 || group >= SCHED_GROUP_NAMED)
    	return -1;
+	odp_thrmask_zero(&mask);
+	odp_thrmask_set(&mask, thr);
+	odp_thrmask_xor(&new_mask, &mask, &sched->mask_all);
+
    odp_spinlock_lock(&sched->grp_lock);
-	odp_thrmask_clr(&sched->sched_grp[group].mask, thr);
+	odp_thrmask_and(&new_mask, &sched->sched_grp[group].mask, &new_mask);
+	grp_update_mask(group, &new_mask);
odp_spinlock_unlock(&sched->grp_lock);
@@ -1219,9 +1324,10 @@ static void schedule_prefetch(int num ODP_UNUSED)
static int schedule_sched_queue(uint32_t queue_index)
 {
+	int grp            = sched->queue[queue_index].grp;
    int prio           = sched->queue[queue_index].prio;
    int queue_per_prio = sched->queue[queue_index].queue_per_prio;
-	ring_t *ring       = &sched->prio_q[prio][queue_per_prio].ring;
+	ring_t *ring       = &sched->prio_q[grp][prio][queue_per_prio].ring;
ring_enq(ring, PRIO_QUEUE_MASK, queue_index);
    return 0;
commit b12bfc6ebd9e7785d069ebea2861d7ac6ca237dd
Author: Petri Savolainen petri.savolainen@linaro.org
Date:   Thu Apr 6 14:59:03 2017 +0300
linux-gen: sched: use weight table for preferences
A precalculated table is more flexible for tunning weights
    than hard coding. As future development, the table may be
    updated with different weights at init or run time.
Signed-off-by: Petri Savolainen petri.savolainen@linaro.org
    Reviewed-and-tested-by: Carl Wallén carl.wallen@nokia.com
    Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/odp_schedule.c b/platform/linux-generic/odp_schedule.c
index cd5bf21b..e7079b95 100644
--- a/platform/linux-generic/odp_schedule.c
+++ b/platform/linux-generic/odp_schedule.c
@@ -39,6 +39,13 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) &&
 /* Priority queues per priority */
 #define QUEUES_PER_PRIO  4
+/* A thread polls a non preferred sched queue every this many polls
+ * of the prefer queue. */
+#define PREFER_RATIO 64
+
+/* Size of poll weight table */
+#define WEIGHT_TBL_SIZE ((QUEUES_PER_PRIO - 1) * PREFER_RATIO)
+
 /* Packet input poll cmd queues */
 #define PKTIO_CMD_QUEUES  4
@@ -142,7 +149,6 @@ typedef struct {
    int index;
    int pause;
    uint16_t round;
-	uint16_t prefer_offset;
    uint16_t pktin_polls;
    uint32_t queue_index;
    odp_queue_t queue;
@@ -157,6 +163,8 @@ typedef struct {
    	ordered_stash_t stash[MAX_ORDERED_STASH];
    } ordered;
+	uint8_t weight_tbl[WEIGHT_TBL_SIZE];
+
 } sched_local_t;
/* Priority queue */
@@ -237,11 +245,29 @@ static inline void schedule_release_context(void);
static void sched_local_init(void)
 {
+	int i;
+	uint8_t id;
+	uint8_t offset = 0;
+
    memset(&sched_local, 0, sizeof(sched_local_t));
sched_local.thr       = odp_thread_id();
    sched_local.queue     = ODP_QUEUE_INVALID;
    sched_local.queue_index = PRIO_QUEUE_EMPTY;
+
+	id = sched_local.thr & (QUEUES_PER_PRIO - 1);
+
+	for (i = 0; i < WEIGHT_TBL_SIZE; i++) {
+		sched_local.weight_tbl[i] = id;
+
+		if (i % PREFER_RATIO == 0) {
+			offset++;
+			sched_local.weight_tbl[i] = (id + offset) &
+						    (QUEUES_PER_PRIO - 1);
+			if (offset == QUEUES_PER_PRIO - 1)
+				offset = 0;
+		}
+	}
 }
static int schedule_init_global(void)
@@ -670,10 +696,10 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
 {
    int prio, i;
    int ret;
-	int id;
-	int offset = 0;
+	int id, first;
    unsigned int max_deq = MAX_DEQ;
    uint32_t qi;
+	uint16_t round;
if (sched_local.num) {
    	ret = copy_events(out_ev, max_num);
@@ -689,15 +715,15 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
    if (odp_unlikely(sched_local.pause))
    	return 0;
-	/* Each thread prefers a priority queue. This offset avoids starvation
-	 * of other priority queues on low thread counts. */
-	if (odp_unlikely((sched_local.round & 0x3f) == 0)) {
-		offset = sched_local.prefer_offset;
-		sched_local.prefer_offset = (offset + 1) &
-					    (QUEUES_PER_PRIO - 1);
-	}
+	/* Each thread prefers a priority queue. Poll weight table avoids
+	 * starvation of other priority queues on low thread counts. */
+	round = sched_local.round + 1;
+
+	if (odp_unlikely(round == WEIGHT_TBL_SIZE))
+		round = 0;
-	sched_local.round++;
+	sched_local.round = round;
+	first = sched_local.weight_tbl[round];
/* Schedule events */
    for (prio = 0; prio < NUM_PRIO; prio++) {
@@ -705,7 +731,8 @@ static int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[],
    	if (sched->pri_mask[prio] == 0)
    		continue;
-		id = (sched_local.thr + offset) & (QUEUES_PER_PRIO - 1);
+		/* Select the first ring based on weights */
+		id = first;
for (i = 0; i < QUEUES_PER_PRIO;) {
    		int num;
commit 43de90c46e321f00ed7bf3f66c0c3c2ad41b5afd
Author: Petri Savolainen petri.savolainen@linaro.org
Date:   Thu Apr 6 14:59:02 2017 +0300
test: l2fwd: add group option
User may give number of scheduling groups to test
    scheduler performance with other that the default (all
    threads) group. Both pktios and threads are allocated
    into these groups with round robin. The number of groups
    may not exceed number of pktios or worker threads.
Signed-off-by: Petri Savolainen petri.savolainen@linaro.org
    Reviewed-and-tested-by: Carl Wallén carl.wallen@nokia.com
    Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/test/common_plat/performance/odp_l2fwd.c b/test/common_plat/performance/odp_l2fwd.c
index 8f5c5e15..33efc022 100644
--- a/test/common_plat/performance/odp_l2fwd.c
+++ b/test/common_plat/performance/odp_l2fwd.c
@@ -104,6 +104,7 @@ typedef struct {
    int src_change;		/**< Change source eth addresses */
    int error_check;        /**< Check packet errors */
    int sched_mode;         /**< Scheduler mode */
+	int num_groups;         /**< Number of scheduling groups */
 } appl_args_t;
static int exit_threads;	/**< Break workers loop if set to 1 */
@@ -130,6 +131,7 @@ typedef union {
 typedef struct thread_args_t {
    int thr_idx;
    int num_pktio;
+	int num_groups;
struct {
    	odp_pktin_queue_t pktin;
@@ -142,7 +144,12 @@ typedef struct thread_args_t {
    	int tx_queue_idx;
    } pktio[MAX_PKTIOS];
-	stats_t *stats;	/**< Pointer to per thread stats */
+	/* Groups to join */
+	odp_schedule_group_t group[MAX_PKTIOS];
+
+	/* Pointer to per thread stats */
+	stats_t *stats;
+
 } thread_args_t;
/**
@@ -297,6 +304,22 @@ static int run_worker_sched_mode(void *arg)
thr = odp_thread_id();
+	if (gbl_args->appl.num_groups) {
+		odp_thrmask_t mask;
+
+		odp_thrmask_zero(&mask);
+		odp_thrmask_set(&mask, thr);
+
+		/* Join non-default groups */
+		for (i = 0; i < thr_args->num_groups; i++) {
+			if (odp_schedule_group_join(thr_args->group[i],
+						    &mask)) {
+				LOG_ERR("Join failed\n");
+				return -1;
+			}
+		}
+	}
+
    num_pktio = thr_args->num_pktio;
if (num_pktio > MAX_PKTIOS) {
@@ -590,7 +613,7 @@ static int run_worker_direct_mode(void *arg)
  * @retval -1 on failure
  */
 static int create_pktio(const char *dev, int idx, int num_rx, int num_tx,
-			odp_pool_t pool)
+			odp_pool_t pool, odp_schedule_group_t group)
 {
    odp_pktio_t pktio;
    odp_pktio_param_t pktio_param;
@@ -650,7 +673,7 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx,
pktin_param.queue_param.sched.prio  = ODP_SCHED_PRIO_DEFAULT;
    	pktin_param.queue_param.sched.sync  = sync_mode;
-		pktin_param.queue_param.sched.group = ODP_SCHED_GROUP_ALL;
+		pktin_param.queue_param.sched.group = group;
    }
if (num_rx > (int)capa.max_input_queues) {
@@ -1016,39 +1039,46 @@ static void usage(char *progname)
    printf("\n"
           "OpenDataPlane L2 forwarding application.\n"
           "\n"
-	       "Usage: %s OPTIONS\n"
+	       "Usage: %s [options]\n"
+	       "\n"
           "  E.g. %s -i eth0,eth1,eth2,eth3 -m 0 -t 1\n"
-	       " In the above example,\n"
-	       " eth0 will send pkts to eth1 and vice versa\n"
-	       " eth2 will send pkts to eth3 and vice versa\n"
+	       "  In the above example,\n"
+	       "  eth0 will send pkts to eth1 and vice versa\n"
+	       "  eth2 will send pkts to eth3 and vice versa\n"
           "\n"
           "Mandatory OPTIONS:\n"
-	       "  -i, --interface Eth interfaces (comma-separated, no spaces)\n"
-	       "                  Interface count min 1, max %i\n"
+	       "  -i, --interface <name>  Eth interfaces (comma-separated, no spaces)\n"
+	       "                          Interface count min 1, max %i\n"
           "\n"
           "Optional OPTIONS:\n"
-	       "  -m, --mode      Packet input mode\n"
-	       "                  0: Direct mode: PKTIN_MODE_DIRECT (default)\n"
-	       "                  1: Scheduler mode with parallel queues: PKTIN_MODE_SCHED + SCHED_SYNC_PARALLEL\n"
-	       "                  2: Scheduler mode with atomic queues:   PKTIN_MODE_SCHED + SCHED_SYNC_ATOMIC\n"
-	       "                  3: Scheduler mode with ordered queues:  PKTIN_MODE_SCHED + SCHED_SYNC_ORDERED\n"
-	       "                  4: Plain queue mode: ODP_PKTIN_MODE_QUEUE\n"
-	       "  -o, --out_mode  Packet output mode\n"
-	       "                  0: Direct mode: PKTOUT_MODE_DIRECT (default)\n"
-	       "                  1: Queue mode:  PKTOUT_MODE_QUEUE\n"
-	       "  -c, --count <number> CPU count.\n"
-	       "  -t, --time  <number> Time in seconds to run.\n"
-	       "  -a, --accuracy <number> Time in seconds get print statistics\n"
+	       "  -m, --mode <arg>        Packet input mode\n"
+	       "                          0: Direct mode: PKTIN_MODE_DIRECT (default)\n"
+	       "                          1: Scheduler mode with parallel queues:\n"
+	       "                             PKTIN_MODE_SCHED + SCHED_SYNC_PARALLEL\n"
+	       "                          2: Scheduler mode with atomic queues:\n"
+	       "                             PKTIN_MODE_SCHED + SCHED_SYNC_ATOMIC\n"
+	       "                          3: Scheduler mode with ordered queues:\n"
+	       "                             PKTIN_MODE_SCHED + SCHED_SYNC_ORDERED\n"
+	       "                          4: Plain queue mode: PKTIN_MODE_QUEUE\n"
+	       "  -o, --out_mode <arg>    Packet output mode\n"
+	       "                          0: Direct mode: PKTOUT_MODE_DIRECT (default)\n"
+	       "                          1: Queue mode:  PKTOUT_MODE_QUEUE\n"
+	       "  -c, --count <num>       CPU count.\n"
+	       "  -t, --time <sec>        Time in seconds to run.\n"
+	       "  -a, --accuracy <sec>    Time in seconds get print statistics\n"
           "                          (default is 1 second).\n"
-	       "  -d, --dst_change  0: Don't change packets' dst eth addresses\n"
-	       "                    1: Change packets' dst eth addresses (default)\n"
-	       "  -s, --src_change  0: Don't change packets' src eth addresses\n"
-	       "                    1: Change packets' src eth addresses (default)\n"
-	       "  -r, --dst_addr    Destination addresses (comma-separated, no spaces)\n"
-	       "                    Requires also the -d flag to be set\n"
-	       "  -e, --error_check 0: Don't check packet errors (default)\n"
-	       "                    1: Check packet errors\n"
-	       "  -h, --help           Display help and exit.\n\n"
+	       "  -d, --dst_change <arg>  0: Don't change packets' dst eth addresses\n"
+	       "                          1: Change packets' dst eth addresses (default)\n"
+	       "  -s, --src_change <arg>  0: Don't change packets' src eth addresses\n"
+	       "                          1: Change packets' src eth addresses (default)\n"
+	       "  -r, --dst_addr <addr>   Destination addresses (comma-separated, no spaces)\n"
+	       "                          Requires also the -d flag to be set\n"
+	       "  -e, --error_check <arg> 0: Don't check packet errors (default)\n"
+	       "                          1: Check packet errors\n"
+	       "  -g, --groups <num>      Number of groups to use: 0 ... num\n"
+	       "                          0: SCHED_GROUP_ALL (default)\n"
+	       "                          num: must not exceed number of interfaces or workers\n"
+	       "  -h, --help              Display help and exit.\n\n"
           "\n", NO_PATH(progname), NO_PATH(progname), MAX_PKTIOS
        );
 }
@@ -1079,11 +1109,12 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args)
    	{"dst_change", required_argument, NULL, 'd'},
    	{"src_change", required_argument, NULL, 's'},
    	{"error_check", required_argument, NULL, 'e'},
+		{"groups", required_argument, NULL, 'g'},
    	{"help", no_argument, NULL, 'h'},
    	{NULL, 0, NULL, 0}
    };
-	static const char *shortopts =  "+c:+t:+a:i:m:o:r:d:s:e:h";
+	static const char *shortopts =  "+c:+t:+a:i:m:o:r:d:s:e:g:h";
/* let helper collect its own arguments (e.g. --odph_proc) */
    odph_parse_options(argc, argv, shortopts, longopts);
@@ -1092,6 +1123,7 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args)
    appl_args->accuracy = 1; /* get and print pps stats second */
    appl_args->dst_change = 1; /* change eth dst address by default */
    appl_args->src_change = 1; /* change eth src address by default */
+	appl_args->num_groups = 0; /* use default group */
    appl_args->error_check = 0; /* don't check packet errors by default */
opterr = 0; /* do not issue errors on helper options */
@@ -1217,6 +1249,9 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args)
    	case 'e':
    		appl_args->error_check = atoi(optarg);
    		break;
+		case 'g':
+			appl_args->num_groups = atoi(optarg);
+			break;
    	case 'h':
    		usage(argv[0]);
    		exit(EXIT_SUCCESS);
@@ -1305,6 +1340,24 @@ static void gbl_args_init(args_t *args)
    }
 }
+static void create_groups(int num, odp_schedule_group_t *group)
+{
+	int i;
+	odp_thrmask_t zero;
+
+	odp_thrmask_zero(&zero);
+
+	/* Create groups */
+	for (i = 0; i < num; i++) {
+		group[i] = odp_schedule_group_create(NULL, &zero);
+
+		if (group[i] == ODP_SCHED_GROUP_INVALID) {
+			LOG_ERR("Group create failed\n");
+			exit(EXIT_FAILURE);
+		}
+	}
+}
+
 /**
  * ODP L2 forwarding main function
  */
@@ -1325,6 +1378,8 @@ int main(int argc, char *argv[])
    int if_count;
    int (*thr_run_func)(void *);
    odp_instance_t instance;
+	int num_groups;
+	odp_schedule_group_t group[MAX_PKTIOS];
/* Init ODP before calling anything else */
    if (odp_init_global(&instance, NULL, NULL)) {
@@ -1374,10 +1429,23 @@ int main(int argc, char *argv[])
if_count = gbl_args->appl.if_count;
+	num_groups = gbl_args->appl.num_groups;
+
    printf("num worker threads: %i\n", num_workers);
    printf("first CPU:          %i\n", odp_cpumask_first(&cpumask));
    printf("cpu mask:           %s\n", cpumaskstr);
+	if (num_groups)
+		printf("num groups:         %i\n", num_groups);
+
+	printf("\n");
+
+	if (num_groups > if_count || num_groups > num_workers) {
+		LOG_ERR("Too many groups. Number of groups may not exceed "
+			"number of interfaces or workers.\n");
+		exit(EXIT_FAILURE);
+	}
+
    /* Create packet pool */
    odp_pool_param_init(&params);
    params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE;
@@ -1399,9 +1467,18 @@ int main(int argc, char *argv[])
bind_workers();
+	/* Default */
+	if (num_groups == 0) {
+		group[0]   = ODP_SCHED_GROUP_ALL;
+		num_groups = 1;
+	} else {
+		create_groups(num_groups, group);
+	}
+
    for (i = 0; i < if_count; ++i) {
    	const char *dev = gbl_args->appl.if_names[i];
    	int num_rx, num_tx;
+		odp_schedule_group_t grp;
/* A queue per worker in scheduled mode */
    	num_rx = num_workers;
@@ -1413,7 +1490,10 @@ int main(int argc, char *argv[])
    		num_tx = gbl_args->pktios[i].num_tx_thr;
    	}
-		if (create_pktio(dev, i, num_rx, num_tx, pool))
+		/* Round robin pktios to groups */
+		grp = group[i % num_groups];
+
+		if (create_pktio(dev, i, num_rx, num_tx, pool, grp))
    		exit(EXIT_FAILURE);
/* Save interface ethernet address */
@@ -1473,6 +1553,10 @@ int main(int argc, char *argv[])
    	thr_params.thr_type = ODP_THREAD_WORKER;
    	thr_params.instance = instance;
+		/* Round robin threads to groups */
+		gbl_args->thread[i].num_groups = 1;
+		gbl_args->thread[i].group[0] = group[i % num_groups];
+
    	gbl_args->thread[i].stats = &stats[i];
odp_cpumask_zero(&thd_mask);
-----------------------------------------------------------------------
Summary of changes:
 platform/linux-generic/odp_schedule.c    | 313 ++++++++++++++++++++++---------
 test/common_plat/performance/odp_l2fwd.c | 148 +++++++++++----
 2 files changed, 339 insertions(+), 122 deletions(-)
hooks/post-receive
--