This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "".
The branch, master has been updated via 577a58dddef4824f709f00b602543bde3f440ac7 (commit) via 0c6352e2d0f755c310692f5cf627801abf8ccb63 (commit) via 71b674a18a4655ab17bb9ab851492f8c51a9b950 (commit) via 3b11463dc8f9ce6bdb95490eeda933e6b6be3534 (commit) via 23e4a06ea025492e33a9a0f759386133e9eef6ad (commit) via 917a2bf7e8a41538f811889099738eb3979efc3a (commit) via f60d82e96b995e10217ab40b6ad9f9cba3a6cadb (commit) from 65f2959da164c4fed23b713c99842213c7ab04e1 (commit)
Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below.
- Log ----------------------------------------------------------------- commit 577a58dddef4824f709f00b602543bde3f440ac7 Author: Janne Peltonen janne.peltonen@nokia.com Date: Tue Sep 25 13:00:51 2018 +0300
linux-gen: ipsec: make IPv4 ID allocator scale better to multiple threads
Allocate IPv4 ID to threads in blocks to avoid updating shared IPv4 ID variable for every packet. Keep free ID blocks in a ring to maximize the time before reusing a block.
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index 08e43993..fa0d7096 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -15,6 +15,7 @@ #include <odp_debug_internal.h> #include <odp_ipsec_internal.h> #include <odp_shm_internal.h> +#include <odp_ring_mpmc_internal.h>
#include <odp/api/plat/atomic_inlines.h> #include <odp/api/plat/cpu_inlines.h> @@ -25,10 +26,33 @@ #define IPSEC_SA_STATE_FREE 0xc0000000 #define IPSEC_SA_STATE_RESERVED 0x80000000
+/* + * We do not have global IPv4 ID counter that is accessed for every outbound + * packet. Instead, we split IPv4 ID space to fixed size blocks that we + * allocate to threads on demand. When a thread has used its block of IDs, + * it frees it and allocates a new block. Free blocks are kept in a ring so + * that the block last freed is the one to be allocated last to maximize + * the time before IPv4 ID reuse. + */ +#define IPV4_ID_BLOCK_SIZE 64 /* must be power of 2 */ +#define IPV4_ID_RING_SIZE (UINT16_MAX / IPV4_ID_BLOCK_SIZE) +#define IPV4_ID_RING_MASK (IPV4_ID_RING_SIZE - 1) + +#if IPV4_ID_RING_SIZE <= ODP_THREAD_COUNT_MAX +#warning IPV4_ID_RING_SIZE is too small for the maximum number of threads. +#endif + +typedef struct ODP_ALIGNED_CACHE ipsec_thread_local_s { + uint16_t first_ipv4_id; /* first ID of current block of IDs */ + uint16_t next_ipv4_id; /* next ID to be used */ +} ipsec_thread_local_t; + typedef struct ipsec_sa_table_t { ipsec_sa_t ipsec_sa[ODP_CONFIG_IPSEC_SAS]; + ipsec_thread_local_t per_thread[ODP_THREAD_COUNT_MAX]; struct ODP_ALIGNED_CACHE { - odp_atomic_u32_t ipv4_id; + ring_mpmc_t ipv4_id_ring; + uint32_t ODP_ALIGNED_CACHE ipv4_id_data[IPV4_ID_RING_SIZE]; } hot; odp_shm_t shm; } ipsec_sa_table_t; @@ -71,7 +95,31 @@ int _odp_ipsec_sad_init_global(void) ipsec_sa_tbl = odp_shm_addr(shm); memset(ipsec_sa_tbl, 0, sizeof(ipsec_sa_table_t)); ipsec_sa_tbl->shm = shm; - odp_atomic_init_u32(&ipsec_sa_tbl->hot.ipv4_id, 0); + + ring_mpmc_init(&ipsec_sa_tbl->hot.ipv4_id_ring); + for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { + /* + * Make the current ID block fully used, forcing allocation + * of a fresh block at first use. + */ + ipsec_sa_tbl->per_thread[i].first_ipv4_id = 0; + ipsec_sa_tbl->per_thread[i].next_ipv4_id = IPV4_ID_BLOCK_SIZE; + } + /* + * Initialize IPv4 ID ring with ID blocks. + * + * The last ID block is left unused since the ring can hold + * only IPV4_ID_RING_SIZE - 1 entries. + */ + for (i = 0; i < IPV4_ID_RING_SIZE - 1; i++) { + uint32_t data = i * IPV4_ID_BLOCK_SIZE; + + ring_mpmc_enq_multi(&ipsec_sa_tbl->hot.ipv4_id_ring, + ipsec_sa_tbl->hot.ipv4_id_data, + IPV4_ID_RING_MASK, + &data, + 1); + }
for (i = 0; i < ODP_CONFIG_IPSEC_SAS; i++) { ipsec_sa_t *ipsec_sa = ipsec_sa_entry(i); @@ -737,8 +785,28 @@ int _odp_ipsec_sa_replay_update(ipsec_sa_t *ipsec_sa, uint32_t seq, uint16_t _odp_ipsec_sa_alloc_ipv4_id(ipsec_sa_t *ipsec_sa) { (void) ipsec_sa; + ipsec_thread_local_t *tl = &ipsec_sa_tbl->per_thread[odp_thread_id()]; + uint32_t data; + + if (odp_unlikely(tl->next_ipv4_id == + tl->first_ipv4_id + IPV4_ID_BLOCK_SIZE)) { + /* Return used ID block to the ring */ + data = tl->first_ipv4_id; + ring_mpmc_enq_multi(&ipsec_sa_tbl->hot.ipv4_id_ring, + ipsec_sa_tbl->hot.ipv4_id_data, + IPV4_ID_RING_MASK, + &data, + 1); + /* Get new ID block */ + ring_mpmc_deq_multi(&ipsec_sa_tbl->hot.ipv4_id_ring, + ipsec_sa_tbl->hot.ipv4_id_data, + IPV4_ID_RING_MASK, + &data, + 1); + tl->first_ipv4_id = data; + tl->next_ipv4_id = data; + }
/* No need to convert to BE: ID just should not be duplicated */ - return odp_atomic_fetch_add_u32(&ipsec_sa_tbl->hot.ipv4_id, 1) - & 0xffff; + return tl->next_ipv4_id++; }
commit 0c6352e2d0f755c310692f5cf627801abf8ccb63 Author: Janne Peltonen janne.peltonen@nokia.com Date: Mon Sep 24 11:29:19 2018 +0300
linux-gen: ipsec: use global IPv4 ID allocator for all tunnel SAs
Change the per-SA IPv4 ID allocator to a global one for IPsec to reduce the risk of duplicate IPv4 IDs when there are multiple SAs between the same endpoints.
Use zero IPv4 ID in atomic datagrams (RFC 6864).
Fixes: https://bugs.linaro.org/show_bug.cgi?id=4013
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/include/odp_ipsec_internal.h b/platform/linux-generic/include/odp_ipsec_internal.h index d1185110..3d7d40a3 100644 --- a/platform/linux-generic/include/odp_ipsec_internal.h +++ b/platform/linux-generic/include/odp_ipsec_internal.h @@ -181,9 +181,6 @@ struct ipsec_sa_s { odp_ipsec_ipv4_param_t param; odp_u32be_t src_ip; odp_u32be_t dst_ip; - - /* 32-bit from which low 16 are used */ - odp_atomic_u32_t hdr_id; } tun_ipv4; struct { odp_ipsec_ipv6_param_t param; @@ -273,6 +270,12 @@ int _odp_ipsec_sa_replay_precheck(ipsec_sa_t *ipsec_sa, uint32_t seq, */ int _odp_ipsec_sa_replay_update(ipsec_sa_t *ipsec_sa, uint32_t seq, odp_ipsec_op_status_t *status); + +/** + * Allocate an IPv4 ID for an outgoing packet. + */ +uint16_t _odp_ipsec_sa_alloc_ipv4_id(ipsec_sa_t *ipsec_sa); + /** * Try inline IPsec processing of provided packet. * diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index b7368c5a..8430d707 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -872,14 +872,18 @@ static int ipsec_out_tunnel_ipv4(odp_packet_t *pkt, state->ip_tot_len += _ODP_IPV4HDR_LEN;
out_ip.tot_len = odp_cpu_to_be_16(state->ip_tot_len); - /* No need to convert to BE: ID just should not be duplicated */ - out_ip.id = odp_atomic_fetch_add_u32(&ipsec_sa->out.tun_ipv4.hdr_id, - 1); if (ipsec_sa->copy_df) flags = state->out_tunnel.ip_df; else flags = ((uint16_t)ipv4_param->df) << 14; out_ip.frag_offset = odp_cpu_to_be_16(flags); + + /* Allocate unique IP ID only for non-atomic datagrams */ + if (out_ip.frag_offset == 0) + out_ip.id = _odp_ipsec_sa_alloc_ipv4_id(ipsec_sa); + else + out_ip.id = 0; + out_ip.ttl = ipv4_param->ttl; /* Will be filled later by packet checksum update */ out_ip.chksum = 0; diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index c33c1cc1..08e43993 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -27,6 +27,9 @@
typedef struct ipsec_sa_table_t { ipsec_sa_t ipsec_sa[ODP_CONFIG_IPSEC_SAS]; + struct ODP_ALIGNED_CACHE { + odp_atomic_u32_t ipv4_id; + } hot; odp_shm_t shm; } ipsec_sa_table_t;
@@ -68,6 +71,7 @@ int _odp_ipsec_sad_init_global(void) ipsec_sa_tbl = odp_shm_addr(shm); memset(ipsec_sa_tbl, 0, sizeof(ipsec_sa_table_t)); ipsec_sa_tbl->shm = shm; + odp_atomic_init_u32(&ipsec_sa_tbl->hot.ipv4_id, 0);
for (i = 0; i < ODP_CONFIG_IPSEC_SAS; i++) { ipsec_sa_t *ipsec_sa = ipsec_sa_entry(i); @@ -348,7 +352,6 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param) memcpy(&ipsec_sa->out.tun_ipv4.dst_ip, param->outbound.tunnel.ipv4.dst_addr, sizeof(ipsec_sa->out.tun_ipv4.dst_ip)); - odp_atomic_init_u32(&ipsec_sa->out.tun_ipv4.hdr_id, 0); ipsec_sa->out.tun_ipv4.param.src_addr = &ipsec_sa->out.tun_ipv4.src_ip; ipsec_sa->out.tun_ipv4.param.dst_addr = @@ -730,3 +733,12 @@ int _odp_ipsec_sa_replay_update(ipsec_sa_t *ipsec_sa, uint32_t seq,
return 0; } + +uint16_t _odp_ipsec_sa_alloc_ipv4_id(ipsec_sa_t *ipsec_sa) +{ + (void) ipsec_sa; + + /* No need to convert to BE: ID just should not be duplicated */ + return odp_atomic_fetch_add_u32(&ipsec_sa_tbl->hot.ipv4_id, 1) + & 0xffff; +}
commit 71b674a18a4655ab17bb9ab851492f8c51a9b950 Author: Janne Peltonen janne.peltonen@nokia.com Date: Mon Oct 8 10:50:22 2018 +0300
validation: ipsec: make output checking accept any IP ID value
ODP implementation is free to choose the IP ID value in the outbound IP header. Make outbound validation check accept any IP ID value, not just the one in the test vector. Relax packet check for AH packets since IP ID is included in the ICV and the expected ICV cannot be easily calculated in the current api validation code.
Fixes: https://bugs.linaro.org/show_bug.cgi?id=4017
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/test/validation/api/ipsec/ipsec.c b/test/validation/api/ipsec/ipsec.c index 31a6f9b5..b50a5ef9 100644 --- a/test/validation/api/ipsec/ipsec.c +++ b/test/validation/api/ipsec/ipsec.c @@ -9,6 +9,7 @@ #include <odp_api.h> #include <odp_cunit_common.h> #include <unistd.h> +#include <odp/helper/odph_api.h>
#include "ipsec.h"
@@ -438,11 +439,14 @@ odp_packet_t ipsec_packet(const ipsec_test_packet *itp) /* * Compare packages ignoring everything before L3 header */ -static void ipsec_check_packet(const ipsec_test_packet *itp, odp_packet_t pkt) +static void ipsec_check_packet(const ipsec_test_packet *itp, odp_packet_t pkt, + odp_bool_t is_outbound) { uint32_t len = (ODP_PACKET_INVALID == pkt) ? 1 : odp_packet_len(pkt); uint32_t l3, l4; uint8_t data[len]; + const odph_ipv4hdr_t *itp_ip; + odph_ipv4hdr_t *ip;
if (NULL == itp) return; @@ -472,6 +476,38 @@ static void ipsec_check_packet(const ipsec_test_packet *itp, odp_packet_t pkt) if (l4 - l3 != itp->l4_offset - itp->l3_offset) return;
+ ip = (odph_ipv4hdr_t *) &data[l3]; + itp_ip = (const odph_ipv4hdr_t *) &itp->data[itp->l3_offset]; + if (ODPH_IPV4HDR_VER(ip->ver_ihl) == ODPH_IPV4 && + is_outbound && + ip->id != itp_ip->id) { + /* + * IP ID value chosen by the implementation differs + * from the IP value in our test vector. This requires + * special handling in outbound checks. + */ + /* + * Let's change IP ID and header checksum to same values + * as in the test vector to facilitate packet comparison. + */ + CU_ASSERT(odph_ipv4_csum_valid(pkt)); + ip->id = itp_ip->id; + ip->chksum = itp_ip->chksum; + + if (ip->proto == ODPH_IPPROTO_AH) { + /* + * ID field is included in the authentication so + * we cannot check ICV against our test vector. + * Check packet data before the first possible + * location of the AH ICV field. + */ + CU_ASSERT_EQUAL(0, memcmp(data + l3, + itp->data + itp->l3_offset, + ODPH_IPV4HDR_LEN + 12)); + return; + } + } + CU_ASSERT_EQUAL(0, memcmp(data + l3, itp->data + itp->l3_offset, len - l3)); @@ -701,7 +737,8 @@ void ipsec_check_in_one(const ipsec_test_part *part, odp_ipsec_sa_t sa) odp_ipsec_sa_context(sa)); } ipsec_check_packet(part->out[i].pkt_out, - pkto[i]); + pkto[i], + false); if (part->out[i].pkt_out != NULL && part->out[i].l3_type != _ODP_PROTO_L3_TYPE_UNDEF) CU_ASSERT_EQUAL(part->out[i].l3_type, @@ -746,7 +783,8 @@ void ipsec_check_out_one(const ipsec_test_part *part, odp_ipsec_sa_t sa) odp_ipsec_sa_context(sa)); } ipsec_check_packet(part->out[i].pkt_out, - pkto[i]); + pkto[i], + true); odp_packet_free(pkto[i]); } }
commit 3b11463dc8f9ce6bdb95490eeda933e6b6be3534 Author: Janne Peltonen janne.peltonen@nokia.com Date: Wed Sep 19 14:52:33 2018 +0300
linux-gen: ipsec: use sequence number counter for counter based IV
Reduce frequently updated SA state by reusing 64-bit sequence number as a counter based IV instead of having a separate counter for it.
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/include/odp_ipsec_internal.h b/platform/linux-generic/include/odp_ipsec_internal.h index d9884f88..d1185110 100644 --- a/platform/linux-generic/include/odp_ipsec_internal.h +++ b/platform/linux-generic/include/odp_ipsec_internal.h @@ -109,8 +109,11 @@ struct ipsec_sa_s { } in;
struct { - odp_atomic_u64_t counter; /* for CTR/GCM */ - odp_atomic_u32_t seq; + /* + * 64-bit sequence number that is also used as + * CTR/GCM IV + */ + odp_atomic_u64_t seq; } out; }; } hot; diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index 6a44277d..b7368c5a 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -811,9 +811,9 @@ err:
/* Generate sequence number */ static inline -uint32_t ipsec_seq_no(ipsec_sa_t *ipsec_sa) +uint64_t ipsec_seq_no(ipsec_sa_t *ipsec_sa) { - return odp_atomic_fetch_add_u32(&ipsec_sa->hot.out.seq, 1); + return odp_atomic_fetch_add_u64(&ipsec_sa->hot.out.seq, 1); }
/* Helper for calculating encode length using data length and block size */ @@ -1002,22 +1002,19 @@ static int ipsec_random_data(uint8_t *data, uint32_t len) }
static int ipsec_out_iv(ipsec_state_t *state, - ipsec_sa_t *ipsec_sa) + ipsec_sa_t *ipsec_sa, + uint64_t seq_no) { if (ipsec_sa->use_counter_iv) { - uint64_t ctr; - /* Both GCM and CTR use 8-bit counters */ - ODP_ASSERT(sizeof(ctr) == ipsec_sa->esp_iv_len); + ODP_ASSERT(sizeof(seq_no) == ipsec_sa->esp_iv_len);
- ctr = odp_atomic_fetch_add_u64(&ipsec_sa->hot.out.counter, - 1); /* Check for overrun */ - if (ctr == 0) + if (seq_no == 0) return -1;
memcpy(state->iv, ipsec_sa->salt, ipsec_sa->salt_length); - memcpy(state->iv + ipsec_sa->salt_length, &ctr, + memcpy(state->iv + ipsec_sa->salt_length, &seq_no, ipsec_sa->esp_iv_len);
if (ipsec_sa->aes_ctr_iv) { @@ -1056,6 +1053,7 @@ static int ipsec_out_esp(odp_packet_t *pkt, unsigned trl_len; unsigned pkt_len, new_len; uint8_t proto = _ODP_IPPROTO_ESP; + uint64_t seq_no;
if (odp_unlikely(opt->flag.tfc_dummy)) { ip_data_len = 0; @@ -1089,7 +1087,9 @@ static int ipsec_out_esp(odp_packet_t *pkt, return -1; }
- if (ipsec_out_iv(state, ipsec_sa) < 0) { + seq_no = ipsec_seq_no(ipsec_sa); + + if (ipsec_out_iv(state, ipsec_sa, seq_no) < 0) { status->error.alg = 1; return -1; } @@ -1099,7 +1099,7 @@ static int ipsec_out_esp(odp_packet_t *pkt,
memset(&esp, 0, sizeof(esp)); esp.spi = odp_cpu_to_be_32(ipsec_sa->spi); - esp.seq_no = odp_cpu_to_be_32(ipsec_seq_no(ipsec_sa)); + esp.seq_no = odp_cpu_to_be_32(seq_no & 0xffffffff);
state->esp.aad.spi = esp.spi; state->esp.aad.seq_no = esp.seq_no; @@ -1221,15 +1221,18 @@ static int ipsec_out_ah(odp_packet_t *pkt, ipsec_sa->icv_len; uint16_t ipsec_offset = state->ip_offset + state->ip_hdr_len; uint8_t proto = _ODP_IPPROTO_AH; + uint64_t seq_no;
if (state->ip_tot_len + hdr_len > mtu) { status->error.mtu = 1; return -1; }
+ seq_no = ipsec_seq_no(ipsec_sa); + memset(&ah, 0, sizeof(ah)); ah.spi = odp_cpu_to_be_32(ipsec_sa->spi); - ah.seq_no = odp_cpu_to_be_32(ipsec_seq_no(ipsec_sa)); + ah.seq_no = odp_cpu_to_be_32(seq_no & 0xffffffff); ah.next_header = state->ip_next_hdr;
odp_packet_copy_from_mem(*pkt, state->ip_next_hdr_offset, 1, &proto); @@ -1265,7 +1268,7 @@ static int ipsec_out_ah(odp_packet_t *pkt, ah.ah_len = hdr_len / 4 - 2;
/* For GMAC */ - if (ipsec_out_iv(state, ipsec_sa) < 0) { + if (ipsec_out_iv(state, ipsec_sa, seq_no) < 0) { status->error.alg = 1; return -1; } diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index 31a7ac92..c33c1cc1 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -321,7 +321,7 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param) odp_atomic_init_u64(&ipsec_sa->hot.in.antireplay, 0); } else { ipsec_sa->lookup_mode = ODP_IPSEC_LOOKUP_DISABLED; - odp_atomic_store_u32(&ipsec_sa->hot.out.seq, 1); + odp_atomic_store_u64(&ipsec_sa->hot.out.seq, 1); ipsec_sa->out.frag_mode = param->outbound.frag_mode; ipsec_sa->out.mtu = param->outbound.mtu; } @@ -470,10 +470,6 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param) break; }
- if (1 == ipsec_sa->use_counter_iv && - ODP_IPSEC_DIR_OUTBOUND == param->dir) - odp_atomic_init_u64(&ipsec_sa->hot.out.counter, 1); - ipsec_sa->icv_len = crypto_param.auth_digest_len;
if (param->crypto.cipher_key_extra.length) {
commit 23e4a06ea025492e33a9a0f759386133e9eef6ad Author: Janne Peltonen janne.peltonen@nokia.com Date: Tue Sep 11 11:37:49 2018 +0300
linux-gen: ipsec: separate hot r/w data from r/o data in an SA
Group very frequently updated SA state together, separately from read-only data to reduce false sharing of cache lines and resulting cache missed (not done for tun_ipv4.hdr_id which should be removed).
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/include/odp_ipsec_internal.h b/platform/linux-generic/include/odp_ipsec_internal.h index 4941fbba..d9884f88 100644 --- a/platform/linux-generic/include/odp_ipsec_internal.h +++ b/platform/linux-generic/include/odp_ipsec_internal.h @@ -94,6 +94,27 @@ int _odp_ipsec_status_send(odp_queue_t queue, struct ipsec_sa_s { odp_atomic_u32_t ODP_ALIGNED_CACHE state;
+ /* + * State that gets updated very frequently. Grouped separately + * to avoid false cache line sharing with other data. + */ + struct ODP_ALIGNED_CACHE { + /* Statistics for soft/hard expiration */ + odp_atomic_u64_t bytes; + odp_atomic_u64_t packets; + + union { + struct { + odp_atomic_u64_t antireplay; + } in; + + struct { + odp_atomic_u64_t counter; /* for CTR/GCM */ + odp_atomic_u32_t seq; + } out; + }; + } hot; + uint32_t ipsec_sa_idx; odp_ipsec_sa_t ipsec_sa_hdl;
@@ -108,10 +129,6 @@ struct ipsec_sa_s { uint64_t hard_limit_bytes; uint64_t hard_limit_packets;
- /* Statistics for soft/hard expiration */ - odp_atomic_u64_t bytes; - odp_atomic_u64_t packets; - odp_crypto_session_t session; void *context; odp_queue_t queue; @@ -150,12 +167,9 @@ struct ipsec_sa_s { odp_u32be_t lookup_dst_ipv4; uint8_t lookup_dst_ipv6[_ODP_IPV6ADDR_LEN]; }; - odp_atomic_u64_t antireplay; } in;
struct { - odp_atomic_u64_t counter; /* for CTR/GCM */ - odp_atomic_u32_t seq; odp_ipsec_frag_mode_t frag_mode; uint32_t mtu;
diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index a71efffe..6a44277d 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -813,7 +813,7 @@ err: static inline uint32_t ipsec_seq_no(ipsec_sa_t *ipsec_sa) { - return odp_atomic_fetch_add_u32(&ipsec_sa->out.seq, 1); + return odp_atomic_fetch_add_u32(&ipsec_sa->hot.out.seq, 1); }
/* Helper for calculating encode length using data length and block size */ @@ -1010,7 +1010,7 @@ static int ipsec_out_iv(ipsec_state_t *state, /* Both GCM and CTR use 8-bit counters */ ODP_ASSERT(sizeof(ctr) == ipsec_sa->esp_iv_len);
- ctr = odp_atomic_fetch_add_u64(&ipsec_sa->out.counter, + ctr = odp_atomic_fetch_add_u64(&ipsec_sa->hot.out.counter, 1); /* Check for overrun */ if (ctr == 0) diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index aa1c337d..31a7ac92 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -75,8 +75,8 @@ int _odp_ipsec_sad_init_global(void) ipsec_sa->ipsec_sa_hdl = ipsec_sa_index_to_handle(i); ipsec_sa->ipsec_sa_idx = i; odp_atomic_init_u32(&ipsec_sa->state, IPSEC_SA_STATE_FREE); - odp_atomic_init_u64(&ipsec_sa->bytes, 0); - odp_atomic_init_u64(&ipsec_sa->packets, 0); + odp_atomic_init_u64(&ipsec_sa->hot.bytes, 0); + odp_atomic_init_u64(&ipsec_sa->hot.packets, 0); }
return 0; @@ -318,10 +318,10 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param) if (param->inbound.antireplay_ws > IPSEC_ANTIREPLAY_WS) goto error; ipsec_sa->antireplay = (param->inbound.antireplay_ws != 0); - odp_atomic_init_u64(&ipsec_sa->in.antireplay, 0); + odp_atomic_init_u64(&ipsec_sa->hot.in.antireplay, 0); } else { ipsec_sa->lookup_mode = ODP_IPSEC_LOOKUP_DISABLED; - odp_atomic_store_u32(&ipsec_sa->out.seq, 1); + odp_atomic_store_u32(&ipsec_sa->hot.out.seq, 1); ipsec_sa->out.frag_mode = param->outbound.frag_mode; ipsec_sa->out.mtu = param->outbound.mtu; } @@ -331,8 +331,8 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param) ipsec_sa->copy_flabel = param->opt.copy_flabel; ipsec_sa->udp_encap = param->opt.udp_encap;
- odp_atomic_store_u64(&ipsec_sa->bytes, 0); - odp_atomic_store_u64(&ipsec_sa->packets, 0); + odp_atomic_store_u64(&ipsec_sa->hot.bytes, 0); + odp_atomic_store_u64(&ipsec_sa->hot.packets, 0); ipsec_sa->soft_limit_bytes = param->lifetime.soft_limit.bytes; ipsec_sa->soft_limit_packets = param->lifetime.soft_limit.packets; ipsec_sa->hard_limit_bytes = param->lifetime.hard_limit.bytes; @@ -472,7 +472,7 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param)
if (1 == ipsec_sa->use_counter_iv && ODP_IPSEC_DIR_OUTBOUND == param->dir) - odp_atomic_init_u64(&ipsec_sa->out.counter, 1); + odp_atomic_init_u64(&ipsec_sa->hot.out.counter, 1);
ipsec_sa->icv_len = crypto_param.auth_digest_len;
@@ -636,13 +636,13 @@ int _odp_ipsec_sa_stats_precheck(ipsec_sa_t *ipsec_sa, int rc = 0;
if (ipsec_sa->hard_limit_bytes > 0 && - odp_atomic_load_u64(&ipsec_sa->bytes) > + odp_atomic_load_u64(&ipsec_sa->hot.bytes) > ipsec_sa->hard_limit_bytes) { status->error.hard_exp_bytes = 1; rc = -1; } if (ipsec_sa->hard_limit_packets > 0 && - odp_atomic_load_u64(&ipsec_sa->packets) > + odp_atomic_load_u64(&ipsec_sa->hot.packets) > ipsec_sa->hard_limit_packets) { status->error.hard_exp_packets = 1; rc = -1; @@ -654,8 +654,8 @@ int _odp_ipsec_sa_stats_precheck(ipsec_sa_t *ipsec_sa, int _odp_ipsec_sa_stats_update(ipsec_sa_t *ipsec_sa, uint32_t len, odp_ipsec_op_status_t *status) { - uint64_t bytes = odp_atomic_fetch_add_u64(&ipsec_sa->bytes, len) + len; - uint64_t packets = odp_atomic_fetch_add_u64(&ipsec_sa->packets, 1) + 1; + uint64_t bytes = odp_atomic_fetch_add_u64(&ipsec_sa->hot.bytes, len) + len; + uint64_t packets = odp_atomic_fetch_add_u64(&ipsec_sa->hot.packets, 1) + 1; int rc = 0;
if (ipsec_sa->soft_limit_bytes > 0 && @@ -686,7 +686,7 @@ int _odp_ipsec_sa_replay_precheck(ipsec_sa_t *ipsec_sa, uint32_t seq, /* Try to be as quick as possible, we will discard packets later */ if (ipsec_sa->antireplay && seq + IPSEC_ANTIREPLAY_WS <= - (odp_atomic_load_u64(&ipsec_sa->in.antireplay) & 0xffffffff)) { + (odp_atomic_load_u64(&ipsec_sa->hot.in.antireplay) & 0xffffffff)) { status->error.antireplay = 1; return -1; } @@ -703,7 +703,7 @@ int _odp_ipsec_sa_replay_update(ipsec_sa_t *ipsec_sa, uint32_t seq, if (!ipsec_sa->antireplay) return 0;
- state = odp_atomic_load_u64(&ipsec_sa->in.antireplay); + state = odp_atomic_load_u64(&ipsec_sa->hot.in.antireplay);
while (0 == cas) { uint32_t max_seq = state & 0xffffffff; @@ -728,7 +728,7 @@ int _odp_ipsec_sa_replay_update(ipsec_sa_t *ipsec_sa, uint32_t seq,
new_state = (((uint64_t)mask) << 32) | max_seq;
- cas = odp_atomic_cas_acq_rel_u64(&ipsec_sa->in.antireplay, + cas = odp_atomic_cas_acq_rel_u64(&ipsec_sa->hot.in.antireplay, &state, new_state); }
commit 917a2bf7e8a41538f811889099738eb3979efc3a Author: Janne Peltonen janne.peltonen@nokia.com Date: Mon Sep 17 14:55:53 2018 +0300
linux-gen: ipsec: remove SA reference counting from outbound processing
SA reference counting in IPsec output has severe performance penalty when the same SA is used in multiple threads. Remove SA reference counting from odp_ipsec_out() and odp_ipsec_out_enq() as it is actually unnecessary with applications adhering to the ODP API.
The reference counting would prevent odp_ipsec_sa_disable() from completing if IPsec outbound processing for the same SA were still in progress in some other thread. With a correctly behaving ODP application such situation never occurs because the ODP API requires that odp_ipsec_sa_disable() must not be called at the same time as odp_ipsec_out() or odp_ipsec_out_enq() for the same SA. The disable call must thus happen after (in the C11 thread model sense, including memory ordering) any conflicting IPsec output call, which means that the ODP application must use appropriate synchronization mechanisms to ensure that all odp_ipsec_out()/odp_ipsec_out_enc() calls (for the SA) have completed before odp_ipsec_sa_disable() is called.
Similarly, when an SA is created, the handle must not be used in other threads for output before the creation is complete and visible. This means that an ODP application must use proper synchronization mechanism when passing the handle of a newly created SA to another thread and before using it there. This in turns make the SA state check in ipsec_sa_lock() unnecessary when indirectly called through the IPsec output routines.
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/include/odp_ipsec_internal.h b/platform/linux-generic/include/odp_ipsec_internal.h index dfde4d57..4941fbba 100644 --- a/platform/linux-generic/include/odp_ipsec_internal.h +++ b/platform/linux-generic/include/odp_ipsec_internal.h @@ -207,6 +207,11 @@ uint32_t _odp_ipsec_cipher_iv_len(odp_cipher_alg_t cipher); /* Return digest length required for the cipher for IPsec use */ uint32_t _odp_ipsec_auth_digest_len(odp_auth_alg_t auth);
+/* + * Get SA entry from handle without obtaining a reference + */ +ipsec_sa_t *_odp_ipsec_sa_entry_from_hdl(odp_ipsec_sa_t sa); + /** * Obtain SA reference */ diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index a62266ec..a71efffe 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -1380,7 +1380,14 @@ static ipsec_sa_t *ipsec_out_single(odp_packet_t pkt, odp_ipsec_frag_mode_t frag_mode; uint32_t mtu;
- ipsec_sa = _odp_ipsec_sa_use(sa); + /* + * No need to do _odp_ipsec_sa_use() here since an ODP application + * is not allowed to do call IPsec output before SA creation has + * completed nor call odp_ipsec_sa_disable() before IPsec output + * has completed. IOW, the needed sychronization between threads + * is done by the application. + */ + ipsec_sa = _odp_ipsec_sa_entry_from_hdl(sa); ODP_ASSERT(NULL != ipsec_sa);
if (opt->flag.tfc_dummy) { @@ -1482,6 +1489,18 @@ static ipsec_sa_t *ipsec_out_single(odp_packet_t pkt,
param.session = ipsec_sa->session;
+ /* + * NOTE: Do not change to an asynchronous design without thinking + * concurrency and what changes are required to guarantee that + * used SAs are not destroyed when asynchronous operations are in + * progress. + * + * The containing code does not hold a reference to the SA but + * completes outbound processing synchronously and makes use of + * the fact that the application may not disable (and then destroy) + * the SA before this output routine returns (and all its side + * effects are visible to the disabling thread). + */ rc = odp_crypto_op(&pkt, &pkt, ¶m, 1); if (rc < 0) { ODP_DBG("Crypto failed\n"); @@ -1632,9 +1651,6 @@ int odp_ipsec_out(const odp_packet_t pkt_in[], int num_in, out_pkt++; sa_idx += sa_inc; opt_idx += opt_inc; - - /* Last thing */ - _odp_ipsec_sa_unuse(ipsec_sa); }
*num_out = out_pkt; @@ -1742,9 +1758,6 @@ int odp_ipsec_out_enq(const odp_packet_t pkt_in[], int num_in, in_pkt++; sa_idx += sa_inc; opt_idx += opt_inc; - - /* Last thing */ - _odp_ipsec_sa_unuse(ipsec_sa); }
return in_pkt; @@ -1884,9 +1897,6 @@ err: in_pkt++; sa_idx += sa_inc; opt_idx += opt_inc; - - /* Last thing */ - _odp_ipsec_sa_unuse(ipsec_sa); }
return in_pkt; diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index 6dd7ec71..aa1c337d 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -47,6 +47,12 @@ static inline odp_ipsec_sa_t ipsec_sa_index_to_handle(uint32_t ipsec_sa_idx) return _odp_cast_scalar(odp_ipsec_sa_t, ipsec_sa_idx + 1); }
+ipsec_sa_t *_odp_ipsec_sa_entry_from_hdl(odp_ipsec_sa_t sa) +{ + ODP_ASSERT(ODP_IPSEC_SA_INVALID != sa); + return ipsec_sa_entry_from_hdl(sa); +} + int _odp_ipsec_sad_init_global(void) { odp_shm_t shm;
commit f60d82e96b995e10217ab40b6ad9f9cba3a6cadb Author: Janne Peltonen janne.peltonen@nokia.com Date: Fri Sep 7 13:14:03 2018 +0300
linux-gen: ipsec: speed up random IV generation by thread-local buffering
Outbound IPsec processing for SAs that require random IV is slow since the the OpenSSL based odp_random_data() is slow and not multi-thread scalable.
Improve performance by not calling odp_random_data() for every packet but by getting random data for IVs from a thread local buffer that is filled by less frequent but larger odp_random_data requests.
Signed-off-by: Janne Peltonen janne.peltonen@nokia.com Reviewed-by: Dmitry Eremin-Solenikov dmitry.ereminsolenikov@linaro.org Reviewed-by: Bill Fischofer bill.fischofer@linaro.org Signed-off-by: Maxim Uvarov maxim.uvarov@linaro.org
diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index 63121744..a62266ec 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -976,6 +976,31 @@ static int ipsec_out_tunnel_ipv6(odp_packet_t *pkt, return 0; }
+#define IPSEC_RANDOM_BUF_SIZE 256 + +static int ipsec_random_data(uint8_t *data, uint32_t len) +{ + static __thread uint8_t buffer[IPSEC_RANDOM_BUF_SIZE]; + static __thread uint32_t buffer_used = IPSEC_RANDOM_BUF_SIZE; + + if (odp_likely(buffer_used + len <= IPSEC_RANDOM_BUF_SIZE)) { + memcpy(data, &buffer[buffer_used], len); + buffer_used += len; + } else if (odp_likely(len <= IPSEC_RANDOM_BUF_SIZE)) { + uint32_t rnd_len; + + rnd_len = odp_random_data(buffer, IPSEC_RANDOM_BUF_SIZE, + odp_global_ro.ipsec_rand_kind); + if (odp_unlikely(rnd_len != IPSEC_RANDOM_BUF_SIZE)) + return -1; + memcpy(data, &buffer[0], len); + buffer_used = len; + } else { + return -1; + } + return 0; +} + static int ipsec_out_iv(ipsec_state_t *state, ipsec_sa_t *ipsec_sa) { @@ -1002,12 +1027,7 @@ static int ipsec_out_iv(ipsec_state_t *state, state->iv[15] = 1; } } else if (ipsec_sa->esp_iv_len) { - uint32_t len; - - len = odp_random_data(state->iv, ipsec_sa->esp_iv_len, - odp_global_ro.ipsec_rand_kind); - - if (len != ipsec_sa->esp_iv_len) + if (ipsec_random_data(state->iv, ipsec_sa->esp_iv_len)) return -1; }
-----------------------------------------------------------------------
Summary of changes: .../linux-generic/include/odp_ipsec_internal.h | 45 ++++++-- platform/linux-generic/odp_ipsec.c | 103 ++++++++++++------ platform/linux-generic/odp_ipsec_sad.c | 118 +++++++++++++++++---- test/validation/api/ipsec/ipsec.c | 44 +++++++- 4 files changed, 246 insertions(+), 64 deletions(-)
hooks/post-receive