2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

Merge branch 'bonding-fix-negotiation-flapping-in-802-3ad-passive-mode'

Hangbin Liu says:

====================
bonding: fix negotiation flapping in 802.3ad passive mode

This patch fixes unstable LACP negotiation when bonding is configured in
passive mode (`lacp_active=off`).

Previously, the actor would stop sending LACPDUs after initial negotiation
succeeded, leading to the partner timing out and restarting the negotiation
cycle. This resulted in continuous LACP state flapping.

The fix ensures the passive actor starts sending periodic LACPDUs after
receiving the first LACPDU from the partner, in accordance with IEEE
802.1AX-2020 section 6.4.1.
====================

Link: https://patch.msgid.link/20250815062000.22220-1-liuhangbin@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2025-08-21 09:35:24 +02:00
commit 184fa9d704
6 changed files with 159 additions and 19 deletions

View File

@ -95,13 +95,13 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
static void ad_mux_machine(struct port *port, bool *update_slave_arr);
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
static void ad_tx_machine(struct port *port);
static void ad_periodic_machine(struct port *port, struct bond_params *bond_params);
static void ad_periodic_machine(struct port *port);
static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
static void ad_agg_selection_logic(struct aggregator *aggregator,
bool *update_slave_arr);
static void ad_clear_agg(struct aggregator *aggregator);
static void ad_initialize_agg(struct aggregator *aggregator);
static void ad_initialize_port(struct port *port, int lacp_fast);
static void ad_initialize_port(struct port *port, const struct bond_params *bond_params);
static void ad_enable_collecting(struct port *port);
static void ad_disable_distributing(struct port *port,
bool *update_slave_arr);
@ -1307,10 +1307,16 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
* case of EXPIRED even if LINK_DOWN didn't arrive for
* the port.
*/
port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION;
port->sm_vars &= ~AD_PORT_MATCHED;
/* Based on IEEE 8021AX-2014, Figure 6-18 - Receive
* machine state diagram, the statue should be
* Partner_Oper_Port_State.Synchronization = FALSE;
* Partner_Oper_Port_State.LACP_Timeout = Short Timeout;
* start current_while_timer(Short Timeout);
* Actor_Oper_Port_State.Expired = TRUE;
*/
port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION;
port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT;
port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY;
port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));
port->actor_oper_port_state |= LACP_STATE_EXPIRED;
port->sm_vars |= AD_PORT_CHURNED;
@ -1417,11 +1423,10 @@ static void ad_tx_machine(struct port *port)
/**
* ad_periodic_machine - handle a port's periodic state machine
* @port: the port we're looking at
* @bond_params: bond parameters we will use
*
* Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
*/
static void ad_periodic_machine(struct port *port, struct bond_params *bond_params)
static void ad_periodic_machine(struct port *port)
{
periodic_states_t last_state;
@ -1430,8 +1435,7 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
/* check if port was reinitialized */
if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
(!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
!bond_params->lacp_active) {
(!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))) {
port->sm_periodic_state = AD_NO_PERIODIC;
}
/* check if state machine should change state */
@ -1955,16 +1959,16 @@ static void ad_initialize_agg(struct aggregator *aggregator)
/**
* ad_initialize_port - initialize a given port's parameters
* @port: the port we're looking at
* @lacp_fast: boolean. whether fast periodic should be used
* @bond_params: bond parameters we will use
*/
static void ad_initialize_port(struct port *port, int lacp_fast)
static void ad_initialize_port(struct port *port, const struct bond_params *bond_params)
{
static const struct port_params tmpl = {
.system_priority = 0xffff,
.key = 1,
.port_number = 1,
.port_priority = 0xff,
.port_state = 1,
.port_state = 0,
};
static const struct lacpdu lacpdu = {
.subtype = 0x01,
@ -1982,12 +1986,14 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
port->actor_port_priority = 0xff;
port->actor_port_aggregator_identifier = 0;
port->ntt = false;
port->actor_admin_port_state = LACP_STATE_AGGREGATION |
LACP_STATE_LACP_ACTIVITY;
port->actor_oper_port_state = LACP_STATE_AGGREGATION |
LACP_STATE_LACP_ACTIVITY;
port->actor_admin_port_state = LACP_STATE_AGGREGATION;
port->actor_oper_port_state = LACP_STATE_AGGREGATION;
if (bond_params->lacp_active) {
port->actor_admin_port_state |= LACP_STATE_LACP_ACTIVITY;
port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY;
}
if (lacp_fast)
if (bond_params->lacp_fast)
port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT;
memcpy(&port->partner_admin, &tmpl, sizeof(tmpl));
@ -2201,7 +2207,7 @@ void bond_3ad_bind_slave(struct slave *slave)
/* port initialization */
port = &(SLAVE_AD_INFO(slave)->port);
ad_initialize_port(port, bond->params.lacp_fast);
ad_initialize_port(port, &bond->params);
port->slave = slave;
port->actor_port_number = SLAVE_AD_INFO(slave)->id;
@ -2513,7 +2519,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
}
ad_rx_machine(NULL, port);
ad_periodic_machine(port, &bond->params);
ad_periodic_machine(port);
ad_port_selection_logic(port, &update_slave_arr);
ad_mux_machine(port, &update_slave_arr);
ad_tx_machine(port);
@ -2883,6 +2889,31 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
spin_unlock_bh(&bond->mode_lock);
}
/**
* bond_3ad_update_lacp_active - change the lacp active
* @bond: bonding struct
*
* Update actor_oper_port_state when lacp_active is modified.
*/
void bond_3ad_update_lacp_active(struct bonding *bond)
{
struct port *port = NULL;
struct list_head *iter;
struct slave *slave;
int lacp_active;
lacp_active = bond->params.lacp_active;
spin_lock_bh(&bond->mode_lock);
bond_for_each_slave(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave)->port);
if (lacp_active)
port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY;
else
port->actor_oper_port_state &= ~LACP_STATE_LACP_ACTIVITY;
}
spin_unlock_bh(&bond->mode_lock);
}
size_t bond_3ad_stats_size(void)
{
return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */

View File

@ -1660,6 +1660,7 @@ static int bond_option_lacp_active_set(struct bonding *bond,
netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
newval->string, newval->value);
bond->params.lacp_active = newval->value;
bond_3ad_update_lacp_active(bond);
return 0;
}

View File

@ -307,6 +307,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
void bond_3ad_update_lacp_rate(struct bonding *bond);
void bond_3ad_update_lacp_active(struct bonding *bond);
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
size_t bond_3ad_stats_size(void);

View File

@ -10,7 +10,8 @@ TEST_PROGS := \
mode-2-recovery-updelay.sh \
bond_options.sh \
bond-eth-type-change.sh \
bond_macvlan_ipvlan.sh
bond_macvlan_ipvlan.sh \
bond_passive_lacp.sh
TEST_FILES := \
lag_lib.sh \

View File

@ -0,0 +1,105 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test if a bond interface works with lacp_active=off.
# shellcheck disable=SC2034
REQUIRE_MZ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
# shellcheck disable=SC1091
source "$lib_dir"/../../../net/forwarding/lib.sh
# shellcheck disable=SC2317
check_port_state()
{
local netns=$1
local port=$2
local state=$3
ip -n "${netns}" -d -j link show "$port" | \
jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null
}
check_pkt_count()
{
RET=0
local ns="$1"
local iface="$2"
# wait 65s, one per 30s
slowwait_for_counter 65 2 tc_rule_handle_stats_get \
"dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null
}
setup() {
setup_ns c_ns s_ns
# shellcheck disable=SC2154
ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
# Add tc filter to count the pkts
tc -n "${c_ns}" qdisc add dev eth0 clsact
tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass
tc -n "${s_ns}" qdisc add dev eth1 clsact
tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass
ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast
ip -n "${s_ns}" link set eth0 master bond0
ip -n "${s_ns}" link set eth1 master bond0
ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast
ip -n "${c_ns}" link set eth0 master bond0
ip -n "${c_ns}" link set eth1 master bond0
}
trap cleanup_all_ns EXIT
setup
# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s
# after interface up
ip -n "${c_ns}" link set bond0 up
sleep 2
# 1. The passive side shouldn't send LACPDU.
check_pkt_count "${c_ns}" "eth0" && RET=1
log_test "802.3ad lacp_active off" "init port"
ip -n "${s_ns}" link set bond0 up
# 2. The passive side should not have the 'active' flag.
RET=0
slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1
log_test "802.3ad lacp_active off" "port state active"
# 3. The active side should have the 'active' flag.
RET=0
slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1
log_test "802.3ad lacp_active on" "port state active"
# 4. Make sure the connection is not expired.
RET=0
slowwait 5 check_port_state "${s_ns}" "eth0" "distributing"
slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1
log_test "bond 802.3ad lacp_active off" "port connection"
# After testing, disconnect one port on each side to check the state.
ip -n "${s_ns}" link set eth0 nomaster
ip -n "${s_ns}" link set eth0 up
ip -n "${c_ns}" link set eth1 nomaster
ip -n "${c_ns}" link set eth1 up
# Due to Periodic Machine and Rx Machine state change, the bond will still
# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure
# negotiation finished
sleep 5
# 5. The active side should keep sending LACPDU.
check_pkt_count "${s_ns}" "eth1" || RET=1
log_test "bond 802.3ad lacp_active on" "port pkt after disconnect"
# 6. The passive side shouldn't send LACPDU anymore.
check_pkt_count "${c_ns}" "eth0" && RET=1
log_test "bond 802.3ad lacp_active off" "port pkt after disconnect"
exit "$EXIT_STATUS"

View File

@ -6,6 +6,7 @@ CONFIG_MACVLAN=y
CONFIG_IPVLAN=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_CLS_FLOWER=y
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_SCH_INGRESS=y
CONFIG_NLMON=y
CONFIG_VETH=y