This patchset introduces a new per-port bonding option: `ad_actor_port_prio`.
It allows users to configure the actor's port priority, which can then be used by the bonding driver for aggregator selection based on port priority.
This provides finer control over LACP aggregator choice, especially in setups with multiple eligible aggregators over 2 switches.
v3: a) add comments when init slave port_priority (Jonas Gorski) b) rename ad_lacp_port_prio to lacp_port_prio (Jay Vosburgh)
v2: a) set default bond option value for port priority (Nikolay Aleksandrov) b) fix __agg_ports_priority coding style (Nikolay Aleksandrov) c) fix shellcheck warns
Hangbin Liu (3): bonding: add support for per-port LACP actor priority bonding: support aggregator selection based on port priority selftests: bonding: add test for LACP actor port priority
Documentation/networking/bonding.rst | 18 +++- drivers/net/bonding/bond_3ad.c | 31 +++++++ drivers/net/bonding/bond_netlink.c | 16 ++++ drivers/net/bonding/bond_options.c | 37 ++++++++ include/net/bond_3ad.h | 2 + include/net/bond_options.h | 1 + include/uapi/linux/if_link.h | 1 + .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_lacp_prio.sh | 93 +++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 24 ----- tools/testing/selftests/net/lib.sh | 24 +++++ 11 files changed, 224 insertions(+), 26 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
Introduce a new netlink attribute 'ad_actor_port_prio' to allow setting the LACP actor port priority on a per-slave basis. This extends the existing bonding infrastructure to support more granular control over LACP negotiations.
The priority value is embedded in LACPDU packets and will be used by subsequent patches to influence aggregator selection policies.
Signed-off-by: Hangbin Liu liuhangbin@gmail.com --- Documentation/networking/bonding.rst | 9 +++++++ drivers/net/bonding/bond_3ad.c | 4 ++++ drivers/net/bonding/bond_netlink.c | 16 +++++++++++++ drivers/net/bonding/bond_options.c | 36 ++++++++++++++++++++++++++++ include/net/bond_3ad.h | 1 + include/net/bond_options.h | 1 + include/uapi/linux/if_link.h | 1 + 7 files changed, 68 insertions(+)
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index f8f5766703d4..1ca7830c24ea 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -193,6 +193,15 @@ ad_actor_sys_prio This parameter has effect only in 802.3ad mode and is available through SysFs interface.
+actor_port_prio + + In an AD system, this specifies the port priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 255 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + netlink interface. + ad_actor_system
In an AD system, this specifies the mac-address for the actor in diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 2fca8e84ab10..eb0fb7374391 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -436,6 +436,7 @@ static void __ad_actor_update_port(struct port *port)
port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; + port->actor_port_priority = SLAVE_AD_INFO(port->slave)->port_priority; }
/* Conversions */ @@ -2203,6 +2204,9 @@ void bond_3ad_bind_slave(struct slave *slave)
ad_initialize_port(port, bond->params.lacp_fast);
+ /* Port priority is initialized. Update it to slave's ad info */ + SLAVE_AD_INFO(slave)->port_priority = port->actor_port_priority; + port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 57fff2421f1b..16ef13ddcf22 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */ 0; }
@@ -77,6 +78,10 @@ static int bond_fill_slave_info(struct sk_buff *skb, ad_port->partner_oper.port_state)) goto nla_put_failure; } + + if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, + SLAVE_AD_INFO(slave)->port_priority)) + goto nla_put_failure; }
return 0; @@ -130,6 +135,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, + [IFLA_BOND_SLAVE_ACTOR_PORT_PRIO] = { .type = NLA_U16 }, };
static int bond_validate(struct nlattr *tb[], struct nlattr *data[], @@ -180,6 +186,16 @@ static int bond_slave_changelink(struct net_device *bond_dev, return err; }
+ if (data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) { + u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]); + + bond_opt_slave_initval(&newval, &slave_dev, ad_prio); + err = __bond_opt_set(bond, BOND_OPT_ACTOR_PORT_PRIO, &newval, + data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO], extack); + if (err) + return err; + } + return 0; }
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 1d639a3be6ba..8f78f83fb29c 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -79,6 +79,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_actor_port_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_user_port_key_set(struct bonding *bond, @@ -223,6 +225,13 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, };
+static const struct bond_opt_value bond_actor_port_prio_tbl[] = { + { "minval", 1, BOND_VALFLAG_MIN}, + { "maxval", 65535, BOND_VALFLAG_MAX}, + { "default", 255, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0}, +}; + static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -484,6 +493,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_ad_actor_sys_prio_tbl, .set = bond_option_ad_actor_sys_prio_set, }, + [BOND_OPT_ACTOR_PORT_PRIO] = { + .id = BOND_OPT_ACTOR_PORT_PRIO, + .name = "actor_port_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_actor_port_prio_tbl, + .set = bond_option_actor_port_prio_set, + }, [BOND_OPT_AD_ACTOR_SYSTEM] = { .id = BOND_OPT_AD_ACTOR_SYSTEM, .name = "ad_actor_system", @@ -1816,6 +1832,26 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, return 0; }
+static int bond_option_actor_port_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + struct slave *slave; + + slave = bond_slave_get_rtnl(newval->slave_dev); + if (!slave) { + netdev_dbg(bond->dev, "%s called on NULL slave\n", __func__); + return -ENODEV; + } + + netdev_dbg(newval->slave_dev, "Setting actor_port_prio to %llu\n", + newval->value); + + SLAVE_AD_INFO(slave)->port_priority = newval->value; + bond_3ad_update_ad_actor_settings(bond); + + return 0; +} + static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 2053cd8e788a..bf551ca70359 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -274,6 +274,7 @@ struct ad_slave_info { struct port port; /* 802.3ad port structure */ struct bond_3ad_stats stats; u16 id; + u16 port_priority; };
static inline const char *bond_3ad_churn_desc(churn_state_t state) diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 022b122a9fb6..e6eedf23aea1 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -78,6 +78,7 @@ enum { BOND_OPT_PRIO, BOND_OPT_COUPLED_CONTROL, BOND_OPT_BROADCAST_NEIGH, + BOND_OPT_ACTOR_PORT_PRIO, BOND_OPT_LAST };
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 784ace3a519c..45f56c9f95d9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1564,6 +1564,7 @@ enum { IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, IFLA_BOND_SLAVE_PRIO, + IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, __IFLA_BOND_SLAVE_MAX, };
Add a new ad_select policy 'port_priority' that uses the per-port actor priority values (set via ad_actor_port_prio) to determine aggregator selection.
This allows administrators to influence which ports are preferred for aggregation by assigning different priority values, providing more flexible load balancing control in LACP configurations.
Signed-off-by: Hangbin Liu liuhangbin@gmail.com --- Documentation/networking/bonding.rst | 9 ++++++++- drivers/net/bonding/bond_3ad.c | 27 +++++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 1 + include/net/bond_3ad.h | 1 + 4 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 1ca7830c24ea..10d952c3c225 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -250,7 +250,14 @@ ad_select ports (slaves). Reselection occurs as described under the "bandwidth" setting, above.
- The bandwidth and count selection policies permit failover of + prio or 3 + + The active aggregator is chosen by the highest total sum of + actor port priorities across its active ports. Note this + priority is actor_port_prio, not per port prio, which is + used for primary reselect. + + The bandwidth, count and prio selection policies permit failover of 802.3ad aggregations when partial failure of the active aggregator occurs. This keeps the aggregator with the highest availability (either in bandwidth or in number of ports) active at all times. diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index eb0fb7374391..7a0fe057cb6b 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -747,6 +747,18 @@ static int __agg_active_ports(struct aggregator *agg) return active; }
+static unsigned int __agg_ports_priority(const struct aggregator *agg) +{ + struct port *port = agg->lag_ports; + unsigned int prio = 0; + + for (; port; port = port->next_port_in_aggregator) + if (port->is_enabled) + prio += port->actor_port_priority; + + return prio; +} + /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at @@ -1707,6 +1719,9 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * + * BOND_AD_PRIO: Select by total priority of ports. If priority + * is equal, select by count. + * * BOND_AD_STABLE, BOND_AD_BANDWIDTH: Select by bandwidth. */ if (!best) @@ -1725,6 +1740,14 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, return best;
switch (__get_agg_selection_mode(curr->lag_ports)) { + case BOND_AD_PRIO: + if (__agg_ports_priority(curr) > __agg_ports_priority(best)) + return curr; + + if (__agg_ports_priority(curr) < __agg_ports_priority(best)) + return best; + + fallthrough; case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; @@ -1790,6 +1813,10 @@ static int agg_device_up(const struct aggregator *agg) * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * + * BOND_AD_PRIO: select the aggregator with highest total priority of ports + * (slaves), and reselect whenever a link state change takes place or the + * set of slaves in the bond changes. + * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 8f78f83fb29c..16448ee2cef4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -165,6 +165,7 @@ static const struct bond_opt_value bond_ad_select_tbl[] = { { "stable", BOND_AD_STABLE, BOND_VALFLAG_DEFAULT}, { "bandwidth", BOND_AD_BANDWIDTH, 0}, { "count", BOND_AD_COUNT, 0}, + { "prio", BOND_AD_PRIO, 0}, { NULL, -1, 0}, };
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index bf551ca70359..34495df965f0 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -26,6 +26,7 @@ enum { BOND_AD_STABLE = 0, BOND_AD_BANDWIDTH = 1, BOND_AD_COUNT = 2, + BOND_AD_PRIO = 3, };
/* rx machine states(43.4.11 in the 802.3ad standard) */
Add comprehensive selftest to verify: - Per-port actor priority setting via ad_actor_port_prio - Aggregator selection behavior with port_priority ad_select policy
Also move cmd_jq helper from forwarding/lib.sh to net/lib.sh for broader reusability across network selftests.
Here is the result output # ./bond_lacp_prio.sh TEST: bond 802.3ad (ad_actor_port_prio setting) [ OK ] TEST: bond 802.3ad (ad_actor_port_prio select) [ OK ] TEST: bond 802.3ad (ad_actor_port_prio switch) [ OK ]
Signed-off-by: Hangbin Liu liuhangbin@gmail.com --- .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_lacp_prio.sh | 93 +++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 24 ----- tools/testing/selftests/net/lib.sh | 24 +++++ 4 files changed, 119 insertions(+), 25 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..32617a834a6b 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + bond_macvlan_ipvlan.sh \ + bond_lacp_prio.sh
TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..491ad9be3a93 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh +RET=0 + +trap cleanup_all_ns EXIT +# create client, switch, backup switch netns +setup_ns c_ns s_ns b_ns + +# setup links +# shellcheck disable=SC2154 +ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" +ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" +# shellcheck disable=SC2154 +ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" +ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + +ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 lacp_rate fast ad_select prio +ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 lacp_rate fast +ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 lacp_rate fast + +ip -n "${c_ns}" link set eth0 master bond0 +ip -n "${c_ns}" link set eth1 master bond0 +ip -n "${c_ns}" link set eth2 master bond0 +ip -n "${c_ns}" link set eth3 master bond0 +ip -n "${s_ns}" link set eth0 master bond0 +ip -n "${s_ns}" link set eth1 master bond0 +ip -n "${b_ns}" link set eth0 master bond0 +ip -n "${b_ns}" link set eth1 master bond0 + +ip -n "${c_ns}" link set bond0 up +ip -n "${s_ns}" link set bond0 up +ip -n "${b_ns}" link set bond0 up + +# set ad actor port priority, default 255 +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 +prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" ".[].linkinfo.info_slave_data.actor_port_prio") +[ "$prio" -ne 1000 ] && RET=1 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 +prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" ".[].linkinfo.info_slave_data.actor_port_prio") +[ "$prio" -ne 10 ] && RET=1 +log_test "bond 802.3ad" "actor_port_prio setting" + +# Trigger link state change to reselect the aggregator +ip -n "${c_ns}" link set eth1 down +sleep 1 +ip -n "${c_ns}" link set eth1 up +# the active agg should be connect to switch +bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" ".[].linkinfo.info_data.ad_info.aggregator") +eth0_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" ".[].linkinfo.info_slave_data.ad_aggregator_id") +[ "${bond_agg_id}" -ne "${eth0_agg_id}" ] && RET=1 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +# Trigger link state change to reselect the aggregator +ip -n "${c_ns}" link set eth1 down +sleep 1 +ip -n "${c_ns}" link set eth1 up +# now the active agg should be connect to backup switch +bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" ".[].linkinfo.info_data.ad_info.aggregator") +eth2_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" ".[].linkinfo.info_slave_data.ad_aggregator_id") +# shellcheck disable=SC2034 +if [ "${bond_agg_id}" -ne "${eth2_agg_id}" ]; then + RET=1 +fi +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 890b3374dacd..08121cb9dc26 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -571,30 +571,6 @@ wait_for_dev() fi }
-cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index c7add0dc4c60..4dca6893aa8a 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -645,3 +645,27 @@ wait_local_port_listen() sleep 0.1 done } + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +}
linux-kselftest-mirror@lists.linaro.org