net: bonding: fix possible peer notify event loss or dup issue

If the send_peer_notif counter and the peer event notify are not synchronized.
It may cause problems such as the loss or dup of peer notify event.

Before this patch:
- If should_notify_peers is true and the lock for send_peer_notif-- fails, peer
  event may be sent again in next mii_monitor loop, because should_notify_peers
  is still true.
- If should_notify_peers is true and the lock for send_peer_notif-- succeeded,
  but the lock for peer event fails, the peer event will be lost.

This patch locks the RTNL for send_peer_notif, events, and commit simultaneously.

Fixes: 07a4ddec3c ("bonding: add an option to specify a delay between peer notifications")
Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Vincent Bernat <vincent@bernat.ch>
Cc: <stable@vger.kernel.org>
Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
Acked-by: Jay Vosburgh <jv@jvosburgh.net>
Link: https://patch.msgid.link/20251021050933.46412-1-tonghao@bamaicloud.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Tonghao Zhang
2025-10-21 13:09:33 +08:00
committed by Paolo Abeni
parent c0178eec88
commit 10843e1492

View File

@@ -2874,7 +2874,7 @@ static void bond_mii_monitor(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
mii_work.work);
bool should_notify_peers = false;
bool should_notify_peers;
bool commit;
unsigned long delay;
struct slave *slave;
@@ -2886,30 +2886,33 @@ static void bond_mii_monitor(struct work_struct *work)
goto re_arm;
rcu_read_lock();
should_notify_peers = bond_should_notify_peers(bond);
commit = !!bond_miimon_inspect(bond);
if (bond->send_peer_notif) {
rcu_read_unlock();
if (rtnl_trylock()) {
bond->send_peer_notif--;
rtnl_unlock();
}
} else {
rcu_read_unlock();
}
if (commit) {
rcu_read_unlock();
if (commit || bond->send_peer_notif) {
/* Race avoidance with bond_close cancel of workqueue */
if (!rtnl_trylock()) {
delay = 1;
should_notify_peers = false;
goto re_arm;
}
bond_for_each_slave(bond, slave, iter) {
bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
if (commit) {
bond_for_each_slave(bond, slave, iter) {
bond_commit_link_state(slave,
BOND_SLAVE_NOTIFY_LATER);
}
bond_miimon_commit(bond);
}
if (bond->send_peer_notif) {
bond->send_peer_notif--;
if (should_notify_peers)
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
}
bond_miimon_commit(bond);
rtnl_unlock(); /* might sleep, hold no other locks */
}
@@ -2917,13 +2920,6 @@ static void bond_mii_monitor(struct work_struct *work)
re_arm:
if (bond->params.miimon)
queue_delayed_work(bond->wq, &bond->mii_work, delay);
if (should_notify_peers) {
if (!rtnl_trylock())
return;
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
rtnl_unlock();
}
}
static int bond_upper_dev_walk(struct net_device *upper,