From 549d8994447f2f628c6cedd139d53926bdfee881 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 22:38:55 +0100 Subject: [PATCH 001/239] media: vivid: fix FB dependency It's not enough to have a dependency on CONFIG_FB, as that can be in a loadable module when vivid itself is builtin: drivers/media/test-drivers/vivid/vivid-osd.o: in function `vivid_fb_init': vivid-osd.c:(.text+0xdc0): undefined reference to `fb_alloc_cmap' vivid-osd.c:(.text+0xe26): undefined reference to `register_framebuffer' Change the dependency to only allow configurations that can be built, but change the FB to FB_CORE so this is also possible when using DRM with FB compatibility rather than full fbdev. Fixes: 20889ddede38 ("media: vivid: Introduce VIDEO_VIVID_OSD") Signed-off-by: Arnd Bergmann Reviewed-by: Ricardo Ribalda Signed-off-by: Hans Verkuil --- drivers/media/test-drivers/vivid/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/test-drivers/vivid/Kconfig b/drivers/media/test-drivers/vivid/Kconfig index e95edc0f22bf..cc470070a7a5 100644 --- a/drivers/media/test-drivers/vivid/Kconfig +++ b/drivers/media/test-drivers/vivid/Kconfig @@ -32,7 +32,8 @@ config VIDEO_VIVID_CEC config VIDEO_VIVID_OSD bool "Enable Framebuffer for testing Output Overlay" - depends on VIDEO_VIVID && FB + depends on VIDEO_VIVID && FB_CORE + depends on VIDEO_VIVID=m || FB_CORE=y default y select FB_IOMEM_HELPERS help From 9df181c8de1b6b285556f80bfd02584f3457f32e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:46:30 +0100 Subject: [PATCH 002/239] media: i2c: lt6911uxe: Fix Kconfig dependencies: The new driver fails to build if I2C is disabled: drivers/media/i2c/lt6911uxe.c:703:1: error: data definition has no type or storage class [-Werror] 703 | module_i2c_driver(lt6911uxe_i2c_driver); or if I2C is on but V4L2_CCI_I2C is not: ERROR: modpost: "cci_write" [drivers/media/i2c/lt6911uxe.ko] undefined! ERROR: modpost: "cci_read" [drivers/media/i2c/lt6911uxe.ko] undefined! For both by adding a dependency on I2C and selecting V4L2_CCI_I2C, which follows the common practice for these. Fixes: e49563c3be09 ("media: i2c: add lt6911uxe hdmi bridge driver") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil --- drivers/media/i2c/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index e576b213084d..b06365d02ef1 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1149,8 +1149,9 @@ config VIDEO_ISL7998X config VIDEO_LT6911UXE tristate "Lontium LT6911UXE decoder" - depends on ACPI && VIDEO_DEV + depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE + select V4L2_CCI_I2C help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. From 0dce5b44bd38af20b0383ae4cabeead37b4b9a9a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 18 Mar 2025 15:03:27 +0100 Subject: [PATCH 003/239] media: platform: synopsys: VIDEO_SYNOPSYS_HDMIRX should depend on ARCH_ROCKCHIP For now, the Synopsys HDMI HDMI RX Controller is only supported on Rockchip RK3588 SoCs. Hence add a dependency on ARCH_ROCKCHIP, to prevent asking the user about this driver when configuring a kernel without Rockchip SoC support. Fixes: 7b59b132ad4398f9 ("media: platform: synopsys: Add support for HDMI input driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Shreeya Patel Signed-off-by: Hans Verkuil --- drivers/media/platform/synopsys/hdmirx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/synopsys/hdmirx/Kconfig b/drivers/media/platform/synopsys/hdmirx/Kconfig index 27e6706f84a3..4321f985f632 100644 --- a/drivers/media/platform/synopsys/hdmirx/Kconfig +++ b/drivers/media/platform/synopsys/hdmirx/Kconfig @@ -2,6 +2,7 @@ config VIDEO_SYNOPSYS_HDMIRX tristate "Synopsys DesignWare HDMI Receiver driver" + depends on ARCH_ROCKCHIP || COMPILE_TEST depends on VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API From 118b34092e37da1d3c4808e8cd1dd0246ac3f97e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 19 Mar 2025 11:32:56 +0100 Subject: [PATCH 004/239] media: i2c: lt6911uxe: add two selects to Kconfig In order to get the v4l2_subdev functions you need to select MEDIA_CONTROLLER and VIDEO_V4L2_SUBDEV_API. Signed-off-by: Hans Verkuil Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503151002.HacBN2LO-lkp@intel.com/ Fixes: e49563c3be09 ("media: i2c: add lt6911uxe hdmi bridge driver") --- drivers/media/i2c/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index b06365d02ef1..e45ba127069f 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1152,6 +1152,8 @@ config VIDEO_LT6911UXE depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE select V4L2_CCI_I2C + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. From d51adf038ebe59b592005166209b70218b1da849 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Feb 2025 15:02:46 +0100 Subject: [PATCH 005/239] media: cec: tda9950: add back i2c dependency drivers/media/cec/i2c/tda9950.c: In function 'tda9950_write_range': drivers/media/cec/i2c/tda9950.c:92:15: error: implicit declaration of function 'i2c_transfer' [-Wimplicit-function-declaration] 92 | ret = i2c_transfer(client->adapter, &msg, 1); | ^~~~~~~~~~~~ drivers/media/cec/i2c/tda9950.c: In function 'tda9950_probe': drivers/media/cec/i2c/tda9950.c:391:14: error: implicit declaration of function 'i2c_check_functionality' [-Wimplicit-function-declaration] 391 | if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { | ^~~~~~~~~~~~~~~~~~~~~~~ Fixes: caa6f4a75e9f ("media: cec: move driver for TDA9950 from drm/i2c") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil --- drivers/media/cec/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/cec/i2c/Kconfig b/drivers/media/cec/i2c/Kconfig index b9d21643eef1..c31abc26f602 100644 --- a/drivers/media/cec/i2c/Kconfig +++ b/drivers/media/cec/i2c/Kconfig @@ -16,6 +16,7 @@ config CEC_CH7322 config CEC_NXP_TDA9950 tristate "NXP Semiconductors TDA9950/TDA998X HDMI CEC" + depends on I2C select CEC_NOTIFIER select CEC_CORE default DRM_I2C_NXP_TDA998X From 9ca67840c0ddf3f39407339624cef824a4f27599 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 6 Mar 2025 18:54:47 +0000 Subject: [PATCH 006/239] firmware: arm_scmi: Balance device refcount when destroying devices Using device_find_child() to lookup the proper SCMI device to destroy causes an unbalance in device refcount, since device_find_child() calls an implicit get_device(): this, in turns, inhibits the call of the provided release methods upon devices destruction. As a consequence, one of the structures that is not freed properly upon destruction is the internal struct device_private dev->p populated by the drivers subsystem core. KMemleak detects this situation since loading/unloding some SCMI driver causes related devices to be created/destroyed without calling any device_release method. unreferenced object 0xffff00000f583800 (size 512): comm "insmod", pid 227, jiffies 4294912190 hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 60 36 1d 8a 00 80 ff ff ........`6...... backtrace (crc 114e2eed): kmemleak_alloc+0xbc/0xd8 __kmalloc_cache_noprof+0x2dc/0x398 device_add+0x954/0x12d0 device_register+0x28/0x40 __scmi_device_create.part.0+0x1bc/0x380 scmi_device_create+0x2d0/0x390 scmi_create_protocol_devices+0x74/0xf8 scmi_device_request_notifier+0x1f8/0x2a8 notifier_call_chain+0x110/0x3b0 blocking_notifier_call_chain+0x70/0xb0 scmi_driver_register+0x350/0x7f0 0xffff80000a3b3038 do_one_initcall+0x12c/0x730 do_init_module+0x1dc/0x640 load_module+0x4b20/0x5b70 init_module_from_file+0xec/0x158 $ ./scripts/faddr2line ./vmlinux device_add+0x954/0x12d0 device_add+0x954/0x12d0: kmalloc_noprof at include/linux/slab.h:901 (inlined by) kzalloc_noprof at include/linux/slab.h:1037 (inlined by) device_private_init at drivers/base/core.c:3510 (inlined by) device_add at drivers/base/core.c:3561 Balance device refcount by issuing a put_device() on devices found via device_find_child(). Reported-by: Alice Ryhl Closes: https://lore.kernel.org/linux-arm-kernel/Z8nK3uFkspy61yjP@arm.com/T/#mc1f73a0ea5e41014fa145147b7b839fc988ada8f CC: Sudeep Holla CC: Catalin Marinas Fixes: d4f9dddd21f3 ("firmware: arm_scmi: Add dynamic scmi devices creation") Signed-off-by: Cristian Marussi Tested-by: Alice Ryhl Message-Id: <20250306185447.2039336-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 7af01664ce7e..3a5474015f7d 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -255,6 +255,9 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } From c23c03bf1faa1e76be1eba35bad6da6a2a7c95ee Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 10 Mar 2025 17:58:00 +0000 Subject: [PATCH 007/239] firmware: arm_scmi: Fix timeout checks on polling path Polling mode transactions wait for a reply busy-looping without holding a spinlock, but currently the timeout checks are based only on elapsed time: as a result we could hit a false positive whenever our busy-looping thread is pre-empted and scheduled out for a time greater than the polling timeout. Change the checks at the end of the busy-loop to make sure that the polling wasn't indeed successful or an out-of-order reply caused the polling to be forcibly terminated. Fixes: 31d2f803c19c ("firmware: arm_scmi: Add sync_cmds_completed_on_ret transport flag") Reported-by: Huangjie Closes: https://lore.kernel.org/arm-scmi/20250123083323.2363749-1-jackhuang021@gmail.com/ Signed-off-by: Cristian Marussi Cc: stable@vger.kernel.org # 5.18.x Message-Id: <20250310175800.1444293-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1c75a4c9c371..0390d5ff195e 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1248,7 +1248,8 @@ static void xfer_put(const struct scmi_protocol_handle *ph, } static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, - struct scmi_xfer *xfer, ktime_t stop) + struct scmi_xfer *xfer, ktime_t stop, + bool *ooo) { struct scmi_info *info = handle_to_scmi_info(cinfo->handle); @@ -1257,7 +1258,7 @@ static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, * in case of out-of-order receptions of delayed responses */ return info->desc->ops->poll_done(cinfo, xfer) || - try_wait_for_completion(&xfer->done) || + (*ooo = try_wait_for_completion(&xfer->done)) || ktime_after(ktime_get(), stop); } @@ -1274,15 +1275,17 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, * itself to support synchronous commands replies. */ if (!desc->sync_cmds_completed_on_ret) { + bool ooo = false; + /* * Poll on xfer using transport provided .poll_done(); * assumes no completion interrupt was available. */ ktime_t stop = ktime_add_ms(ktime_get(), timeout_ms); - spin_until_cond(scmi_xfer_done_no_timeout(cinfo, - xfer, stop)); - if (ktime_after(ktime_get(), stop)) { + spin_until_cond(scmi_xfer_done_no_timeout(cinfo, xfer, + stop, &ooo)); + if (!ooo && !info->desc->ops->poll_done(cinfo, xfer)) { dev_err(dev, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); From 4567bdaaaaa1744da3d7da07d9aca2f941f5b4e5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Mar 2025 11:57:00 +0000 Subject: [PATCH 008/239] firmware: arm_ffa: Skip Rx buffer ownership release if not acquired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completion of the FFA_PARTITION_INFO_GET ABI transfers the ownership of the caller’s Rx buffer from the producer(typically partition mnager) to the consumer(this driver/OS). FFA_RX_RELEASE transfers the ownership from the consumer back to the producer. However, when we set the flag to just return the count of partitions deployed in the system corresponding to the specified UUID while invoking FFA_PARTITION_INFO_GET, the Rx buffer ownership shouldn't be transferred to this driver. We must be able to skip transferring back the ownership to the partition manager when we request just to get the count of the partitions as the buffers are not acquired in this case. Firmware may return FFA_RET_DENIED or other error for the ffa_rx_release() in such cases. Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support") Message-Id: <20250321115700.3525197-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 19295282de24..fe55613a8ea9 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -299,7 +299,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, import_uuid(&buf->uuid, (u8 *)&rx_buf->uuid); } - ffa_rx_release(); + if (!(flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)) + ffa_rx_release(); mutex_unlock(&drv_info->rx_lock); From 0c562281199f225a849dbb5b9a40b079ee31dc0e Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 3 Apr 2025 21:59:24 -0500 Subject: [PATCH 009/239] arm64: dts: morello: Fix-up cache nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need include the CPU number in the L2 cache node names as the names are local to the CPU nodes. The documented node name is also just "l2-cache". The L3 cache is not part of cpu@0/l2-cache as it is shared among all cores. Move it to /cpus node which is the typical place for shared caches. Signed-off-by: Rob Herring (Arm) Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20250403-dt-cpu-schema-v1-3-076be7171a85@kernel.org> Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/morello.dtsi | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/arm/morello.dtsi b/arch/arm64/boot/dts/arm/morello.dtsi index 0bab0b3ea969..5bc1c725dc86 100644 --- a/arch/arm64/boot/dts/arm/morello.dtsi +++ b/arch/arm64/boot/dts/arm/morello.dtsi @@ -44,7 +44,7 @@ next-level-cache = <&l2_0>; clocks = <&scmi_dvfs 0>; - l2_0: l2-cache-0 { + l2_0: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -53,13 +53,6 @@ cache-sets = <2048>; cache-unified; next-level-cache = <&l3_0>; - - l3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; - cache-size = <0x100000>; - cache-unified; - }; }; }; @@ -78,7 +71,7 @@ next-level-cache = <&l2_1>; clocks = <&scmi_dvfs 0>; - l2_1: l2-cache-1 { + l2_1: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -105,7 +98,7 @@ next-level-cache = <&l2_2>; clocks = <&scmi_dvfs 1>; - l2_2: l2-cache-2 { + l2_2: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -132,7 +125,7 @@ next-level-cache = <&l2_3>; clocks = <&scmi_dvfs 1>; - l2_3: l2-cache-3 { + l2_3: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -143,6 +136,13 @@ next-level-cache = <&l3_0>; }; }; + + l3_0: l3-cache { + compatible = "cache"; + cache-level = <3>; + cache-size = <0x100000>; + cache-unified; + }; }; firmware { From 1aa495a6572f8641da4ec4cd32210deca61bed64 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:06 +0100 Subject: [PATCH 010/239] kunit: configs: Add some Cirrus Logic modules to all_tests Add CONFIG_I2C and CONFIG_SND_SOC_CS35L56_I2C to all_tests.config so that Cirrus Logic modules with KUnit tests will be built. The CS35L56 driver doesn't currently have any KUnit tests itself, but it enables two other libraries that have KUnit tests: cs_dsp and cs-amp-lib. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-2-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- tools/testing/kunit/configs/all_tests.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..43d3c31ab53f 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y CONFIG_PCI=y CONFIG_USB4=y +CONFIG_I2C=y CONFIG_NET=y CONFIG_MCTP=y @@ -51,3 +52,4 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_TOPOLOGY_BUILD=y +CONFIG_SND_SOC_CS35L56_I2C=y From 96014d91cffb335d3b396771524ff2aba3549865 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:07 +0100 Subject: [PATCH 011/239] ASoC: cs-amp-lib-test: Don't select SND_SOC_CS_AMP_LIB Depend on SND_SOC_CS_AMP_LIB instead of selecting it. KUNIT_ALL_TESTS should only build tests for components that are already being built, it should not cause other stuff to be added to the build. Fixes: 177862317a98 ("ASoC: cs-amp-lib: Add KUnit test for calibration helpers") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-3-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 40bb7a1d44bc..20f99cbee29b 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -776,10 +776,9 @@ config SND_SOC_CS_AMP_LIB tristate config SND_SOC_CS_AMP_LIB_TEST - tristate "KUnit test for Cirrus Logic cs-amp-lib" - depends on KUNIT + tristate "KUnit test for Cirrus Logic cs-amp-lib" if !KUNIT_ALL_TESTS + depends on SND_SOC_CS_AMP_LIB && KUNIT default KUNIT_ALL_TESTS - select SND_SOC_CS_AMP_LIB help This builds KUnit tests for the Cirrus Logic common amplifier library. From a0b887f6eb9a0d1be3c57d00b0f3ba8408d3018a Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 11 Apr 2025 13:36:08 +0100 Subject: [PATCH 012/239] firmware: cs_dsp: tests: Depend on FW_CS_DSP rather then enabling it FW_CS_DSP gets enabled if KUNIT is enabled. The test should rather depend on if the feature is enabled. Fix this by moving FW_CS_DSP to the depends on clause. Fixes: dd0b6b1f29b9 ("firmware: cs_dsp: Add KUnit testing of bin file download") Signed-off-by: Nico Pache Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-4-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- drivers/firmware/cirrus/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index 0a883091259a..e3c2e38b746d 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,14 +6,11 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT && REGMAP - select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT && REGMAP + depends on KUNIT && REGMAP && FW_CS_DSP default KUNIT_ALL_TESTS - select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help This builds KUnit tests for cs_dsp. From a9a69c3b38c89d7992fb53db4abb19104b531d32 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sun, 6 Apr 2025 16:08:54 -0500 Subject: [PATCH 013/239] ASoC: imx-card: Adjust over allocation of memory in imx_card_parse_of() Incorrect types are used as sizeof() arguments in devm_kcalloc(). It should be sizeof(dai_link_data) for link_data instead of sizeof(snd_soc_dai_link). This is found by our static analysis tool. Signed-off-by: Chenyuan Yang Link: https://patch.msgid.link/20250406210854.149316-1-chenyuan0y@gmail.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 3686d468506b..45e000f61ecc 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -544,7 +544,7 @@ static int imx_card_parse_of(struct imx_card_data *data) if (!card->dai_link) return -ENOMEM; - data->link_data = devm_kcalloc(dev, num_links, sizeof(*link), GFP_KERNEL); + data->link_data = devm_kcalloc(dev, num_links, sizeof(*link_data), GFP_KERNEL); if (!data->link_data) return -ENOMEM; From 9aff2e8df240e84a36f2607f98a0a9924a24e65d Mon Sep 17 00:00:00 2001 From: Sheetal Date: Fri, 4 Apr 2025 10:59:53 +0000 Subject: [PATCH 014/239] ASoC: soc-pcm: Fix hw_params() and DAPM widget sequence Issue: When multiple audio streams share a common BE DAI, the BE DAI widget can be powered up before its hardware parameters are configured. This incorrect sequence leads to intermittent pcm_write errors. For example, the below Tegra use-case throws an error: aplay(2 streams) -> AMX(mux) -> ADX(demux) -> arecord(2 streams), here, 'AMX TX' and 'ADX RX' are common BE DAIs. For above usecase when failure happens below sequence is observed: aplay(1) FE open() - BE DAI callbacks added to the list - BE DAI state = SND_SOC_DPCM_STATE_OPEN aplay(2) FE open() - BE DAI callbacks are not added to the list as the state is already SND_SOC_DPCM_STATE_OPEN during aplay(1) FE open(). aplay(2) FE hw_params() - BE DAI hw_params() callback ignored aplay(2) FE prepare() - Widget is powered ON without BE DAI hw_params() call aplay(1) FE hw_params() - BE DAI hw_params() is now called Fix: Add BE DAIs in the list if its state is either SND_SOC_DPCM_STATE_OPEN or SND_SOC_DPCM_STATE_HW_PARAMS as well. It ensures the widget is powered ON after BE DAI hw_params() callback. Fixes: 0c25db3f7621 ("ASoC: soc-pcm: Don't reconnect an already active BE") Signed-off-by: Sheetal Link: https://patch.msgid.link/20250404105953.2784819-1-sheetal@nvidia.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 4308a6cbb2e6..43835197d1fe 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1584,10 +1584,13 @@ int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* * Filter for systems with 'component_chaining' enabled. * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. + * already active BE on such systems and ensures the BE DAI + * widget is powered ON after hw_params() BE DAI callback. */ if (fe->card->component_chaining && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) continue; From e64c0ff0d5d85791fbcd126ee558100a06a24a97 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 27 Mar 2025 11:16:00 +0800 Subject: [PATCH 015/239] pinctrl: imx: Return NULL if no group is matched and found Currently if no group is matched and found, this function will return the last grp to the caller, this is not expected, it is supposed to return NULL in this case. Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for managing groups") Signed-off-by: Hui Wang Reviewed-by: Frank Li Link: https://lore.kernel.org/20250327031600.99723-1-hui.wang@canonical.com Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 842a1e6cbfc4..18de31328540 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -37,16 +37,16 @@ static inline const struct group_desc *imx_pinctrl_find_group_by_name( struct pinctrl_dev *pctldev, const char *name) { - const struct group_desc *grp = NULL; + const struct group_desc *grp; int i; for (i = 0; i < pctldev->num_groups; i++) { grp = pinctrl_generic_get_group(pctldev, i); if (grp && !strcmp(grp->grp.name, name)) - break; + return grp; } - return grp; + return NULL; } static void imx_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, From e56088a13708757da68ad035269d69b93ac8c389 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Mar 2025 20:01:32 +0100 Subject: [PATCH 016/239] pinctrl: meson: define the pull up/down resistor value as 60 kOhm The public datasheets of the following Amlogic SoCs describe a typical resistor value for the built-in pull up/down resistor: - Meson8/8b/8m2: not documented - GXBB (S905): 60 kOhm - GXL (S905X): 60 kOhm - GXM (S912): 60 kOhm - G12B (S922X): 60 kOhm - SM1 (S905D3): 60 kOhm The public G12B and SM1 datasheets additionally state min and max values: - min value: 50 kOhm for both, pull-up and pull-down - max value for the pull-up: 70 kOhm - max value for the pull-down: 130 kOhm Use 60 kOhm in the pinctrl-meson driver as well so it's shown in the debugfs output. It may not be accurate for Meson8/8b/8m2 but in reality 60 kOhm is closer to the actual value than 1 Ohm. Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/20250329190132.855196-1-martin.blumenstingl@googlemail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 253a0cc57e39..e5a32a0532ee 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -487,7 +487,7 @@ static int meson_pinconf_get(struct pinctrl_dev *pcdev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: if (meson_pinconf_get_pull(pc, pin) == param) - arg = 1; + arg = 60000; else return -EINVAL; break; From 457d9772e8a5cdae64f66b5f7d5b0247365191ec Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 1 Apr 2025 15:50:21 +0200 Subject: [PATCH 017/239] pinctrl: airoha: fix wrong PHY LED mapping and PHY2 LED defines The current PHY2 LED define are wrong and actually set BITs outside the related mask. Fix it and set the correct value. While at it, also use FIELD_PREP_CONST macro to make it simple to understand what values are actually applied for the mask. Also fix wrong PHY LED mapping. The SoC Switch supports up to 4 port but the register define mapping for 5 PHY port, starting from 0. The mapping was wrongly defined starting from PHY1. Reorder the function group to start from PHY0. PHY4 is actually never supported as we don't have a GPIO pin to assign. Cc: stable@vger.kernel.org Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC") Reviewed-by: Benjamin Larsson Signed-off-by: Christian Marangi Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/20250401135026.18018-1-ansuelsmth@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-airoha.c | 159 ++++++++++------------ 1 file changed, 70 insertions(+), 89 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 547a798b71c8..5d84a778683d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -112,39 +113,19 @@ #define REG_LAN_LED1_MAPPING 0x0280 #define LAN4_LED_MAPPING_MASK GENMASK(18, 16) -#define LAN4_PHY4_LED_MAP BIT(18) -#define LAN4_PHY2_LED_MAP BIT(17) -#define LAN4_PHY1_LED_MAP BIT(16) -#define LAN4_PHY0_LED_MAP 0 -#define LAN4_PHY3_LED_MAP GENMASK(17, 16) +#define LAN4_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN4_LED_MAPPING_MASK, (_n)) #define LAN3_LED_MAPPING_MASK GENMASK(14, 12) -#define LAN3_PHY4_LED_MAP BIT(14) -#define LAN3_PHY2_LED_MAP BIT(13) -#define LAN3_PHY1_LED_MAP BIT(12) -#define LAN3_PHY0_LED_MAP 0 -#define LAN3_PHY3_LED_MAP GENMASK(13, 12) +#define LAN3_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN3_LED_MAPPING_MASK, (_n)) #define LAN2_LED_MAPPING_MASK GENMASK(10, 8) -#define LAN2_PHY4_LED_MAP BIT(12) -#define LAN2_PHY2_LED_MAP BIT(11) -#define LAN2_PHY1_LED_MAP BIT(10) -#define LAN2_PHY0_LED_MAP 0 -#define LAN2_PHY3_LED_MAP GENMASK(11, 10) +#define LAN2_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN2_LED_MAPPING_MASK, (_n)) #define LAN1_LED_MAPPING_MASK GENMASK(6, 4) -#define LAN1_PHY4_LED_MAP BIT(6) -#define LAN1_PHY2_LED_MAP BIT(5) -#define LAN1_PHY1_LED_MAP BIT(4) -#define LAN1_PHY0_LED_MAP 0 -#define LAN1_PHY3_LED_MAP GENMASK(5, 4) +#define LAN1_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN1_LED_MAPPING_MASK, (_n)) #define LAN0_LED_MAPPING_MASK GENMASK(2, 0) -#define LAN0_PHY4_LED_MAP BIT(3) -#define LAN0_PHY2_LED_MAP BIT(2) -#define LAN0_PHY1_LED_MAP BIT(1) -#define LAN0_PHY0_LED_MAP 0 -#define LAN0_PHY3_LED_MAP GENMASK(2, 1) +#define LAN0_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN0_LED_MAPPING_MASK, (_n)) /* CONF */ #define REG_I2C_SDA_E2 0x001c @@ -1476,8 +1457,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1491,8 +1472,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1506,8 +1487,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1521,8 +1502,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1540,8 +1521,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1555,8 +1536,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1570,8 +1551,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1585,8 +1566,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1604,8 +1585,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1619,8 +1600,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1634,8 +1615,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1649,8 +1630,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1668,8 +1649,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1683,8 +1664,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1698,8 +1679,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1713,8 +1694,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, @@ -1732,8 +1713,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1747,8 +1728,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1762,8 +1743,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1777,8 +1758,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1796,8 +1777,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1811,8 +1792,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1826,8 +1807,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1841,8 +1822,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1860,8 +1841,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1875,8 +1856,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1890,8 +1871,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1905,8 +1886,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1924,8 +1905,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1939,8 +1920,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1954,8 +1935,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1969,8 +1950,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, From 63ec4baf725cbde506f0a9640ae6751622b81b0a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 15 Apr 2025 13:29:27 +0100 Subject: [PATCH 018/239] ASoC: Add Cirrus and Wolfson headers to ASoC section of MAINTAINERS Specifically list various Cirrus Logic and Wolfson Micro codec header files under include/sound/ within the ASoC section of MAINTAINERS. Note that not all the include/sound/cs* files are part of ASoC, so more-specific patterns are needed. These files are all part of ASoC codec drivers, and are owned by specific Cirrus Logic and Wolfson Micro sections of MAINTAINERS. But the overall include/sound/* maintainership is only Takashi Iwai and Jaroslav Kysela. So by default get_maintainer.pl would only show Takashi and Jaroslav as maintainers for any patch that changes these files without changing any code under sound/soc. There is a separate MAINTAINERS section for ASoC, so the headers must be added there to make the ASoC maintainers show up in get_maintainer.pl. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250415122927.512200-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad08ca0f423b..4878b77f71ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22656,9 +22656,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git F: Documentation/devicetree/bindings/sound/ F: Documentation/sound/soc/ F: include/dt-bindings/sound/ +F: include/sound/cs-amp-lib.h +F: include/sound/cs35l* +F: include/sound/cs4271.h +F: include/sound/cs42l* +F: include/sound/madera-pdata.h F: include/sound/soc* F: include/sound/sof.h F: include/sound/sof/ +F: include/sound/wm*.h F: include/trace/events/sof*.h F: include/uapi/sound/asoc.h F: sound/soc/ From 68715cb5c0e00284d93f976c6368809f64131b0b Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Tue, 15 Apr 2025 14:41:34 -0500 Subject: [PATCH 019/239] ASoC: Intel: sof_sdw: Add NULL check in asoc_sdw_rt_dmic_rtd_init() mic_name returned by devm_kasprintf() could be NULL. Add a check for it. Signed-off-by: Chenyuan Yang Fixes: bee2fe44679f ("ASoC: Intel: sof_sdw: use generic rtd_init function for Realtek SDW DMICs") Link: https://patch.msgid.link/20250415194134.292830-1-chenyuan0y@gmail.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_rt_dmic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c index 46d917a99c51..97be110a59b6 100644 --- a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c +++ b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c @@ -29,6 +29,8 @@ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_da mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "rt715-sdca"); else mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "%s", component->name_prefix); + if (!mic_name) + return -ENOMEM; card->components = devm_kasprintf(card->dev, GFP_KERNEL, "%s mic:%s", card->components, From fe412e3a6c97cfe49ecf4564a278122f7d78e3f0 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Apr 2025 19:23:37 +0800 Subject: [PATCH 020/239] pinctrl: mediatek: common-v1: Fix EINT breakage on older controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When EINT support for multiple addresses was introduced, the driver library for the older generations (pinctrl-mtk-common) was not fixed together. This resulted in invalid pointer accesses. Fix up the filled in |struct mtk_eint| in pinctrl-mtk-common to match what is now expected by the mtk-eint library. Reported-by: Uwe Kleine-König Tested-by: Uwe Kleine-König Closes: https://lore.kernel.org/all/43nd5jxpk7b7fv46frqlfjnqfh5jlpqsemeoakqzd4wdi3df6y@w7ycd3k5ezvn/ Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Cc: Hao Chang Cc: Qingliang Li Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/20250415112339.2385454-1-wenst@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 91edb539925a..7585de11854b 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1015,7 +1015,13 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) if (!pctl->eint) return -ENOMEM; - pctl->eint->base = devm_platform_ioremap_resource(pdev, 0); + pctl->eint->nbase = 1; + /* mtk-eint expects an array */ + pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); + if (IS_ERR(pctl->eint->base)) + return -ENOMEM; + + pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pctl->eint->base)) return PTR_ERR(pctl->eint->base); From 45e00e36718902d81bdaebb37b3a8244e685bc48 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (Arm)" Date: Tue, 8 Apr 2025 09:03:51 +0530 Subject: [PATCH 021/239] iommu/arm-smmu-v3: Add missing S2FWB feature detection Commit 67e4fe398513 ("iommu/arm-smmu-v3: Use S2FWB for NESTED domains") introduced S2FWB usage but omitted the corresponding feature detection. As a result, vIOMMU allocation fails on FVP in arm_vsmmu_alloc(), due to the following check: if (!arm_smmu_master_canwbs(master) && !(smmu->features & ARM_SMMU_FEAT_S2FWB)) return ERR_PTR(-EOPNOTSUPP); This patch adds the missing detection logic to prevent allocation failure when S2FWB is supported. Fixes: 67e4fe398513 ("iommu/arm-smmu-v3: Use S2FWB for NESTED domains") Signed-off-by: Aneesh Kumar K.V (Arm) Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20250408033351.1012411-1-aneesh.kumar@kernel.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b4c21aaed126..5467f85dd463 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4429,6 +4429,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); From 12f78021973ae422564b234136c702a305932d73 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Sat, 12 Apr 2025 10:23:54 +1000 Subject: [PATCH 022/239] iommu/arm-smmu-v3: Fix pgsize_bit for sva domains UBSan caught a bug with IOMMU SVA domains, where the reported exponent value in __arm_smmu_tlb_inv_range() was >= 64. __arm_smmu_tlb_inv_range() uses the domain's pgsize_bitmap to compute the number of pages to invalidate and the invalidation range. Currently arm_smmu_sva_domain_alloc() does not setup the iommu domain's pgsize_bitmap. This leads to __ffs() on the value returning 64 and that leads to undefined behaviour w.r.t. shift operations Fix this by initializing the iommu_domain's pgsize_bitmap to PAGE_SIZE. Effectively the code needs to use the smallest page size for invalidation Cc: stable@vger.kernel.org Fixes: eb6c97647be2 ("iommu/arm-smmu-v3: Avoid constructing invalid range commands") Suggested-by: Jason Gunthorpe Signed-off-by: Balbir Singh Cc: Jean-Philippe Brucker Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250412002354.3071449-1-balbirs@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9ba596430e7c..980cc6b33c43 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -411,6 +411,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, From b00d24997a11c10d3e420614f0873b83ce358a34 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 15 Apr 2025 11:56:20 -0700 Subject: [PATCH 023/239] iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids ASPEED VGA card has two built-in devices: 0008:06:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge (rev 06) 0008:07:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED Graphics Family (rev 52) Its toplogy looks like this: +-[0008:00]---00.0-[01-09]--+-00.0-[02-09]--+-00.0-[03]----00.0 Sandisk Corp Device 5017 | +-01.0-[04]-- | +-02.0-[05]----00.0 NVIDIA Corporation Device | +-03.0-[06-07]----00.0-[07]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family | +-04.0-[08]----00.0 Renesas Technology Corp. uPD720201 USB 3.0 Host Controller | \-05.0-[09]----00.0 Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller \-00.1 PMC-Sierra Inc. Device 4028 The IORT logic populaties two identical IDs into the fwspec->ids array via DMA aliasing in iort_pci_iommu_init() called by pci_for_each_dma_alias(). Though the SMMU driver had been able to handle this situation since commit 563b5cbe334e ("iommu/arm-smmu-v3: Cope with duplicated Stream IDs"), that got broken by the later commit cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure"), which ended up with allocating separate streams with the same stuffing. On a kernel prior to v6.15-rc1, there has been an overlooked warning: pci 0008:07:00.0: vgaarb: setting as boot VGA device pci 0008:07:00.0: vgaarb: bridge control possible pci 0008:07:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none pcieport 0008:06:00.0: Adding to iommu group 14 ast 0008:07:00.0: stream 67328 already in tree <===== WARNING ast 0008:07:00.0: enabling device (0002 -> 0003) ast 0008:07:00.0: Using default configuration ast 0008:07:00.0: AST 2600 detected ast 0008:07:00.0: [drm] Using analog VGA ast 0008:07:00.0: [drm] dram MCLK=396 Mhz type=1 bus_width=16 [drm] Initialized ast 0.1.0 for 0008:07:00.0 on minor 0 ast 0008:07:00.0: [drm] fb0: astdrmfb frame buffer device With v6.15-rc, since the commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path"), the error returned with the warning is moved to the SMMU device probe flow: arm_smmu_probe_device+0x15c/0x4c0 __iommu_probe_device+0x150/0x4f8 probe_iommu_group+0x44/0x80 bus_for_each_dev+0x7c/0x100 bus_iommu_probe+0x48/0x1a8 iommu_device_register+0xb8/0x178 arm_smmu_device_probe+0x1350/0x1db0 which then fails the entire SMMU driver probe: pci 0008:06:00.0: Adding to iommu group 21 pci 0008:07:00.0: stream 67328 already in tree arm-smmu-v3 arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3 arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 Since SMMU driver had been already expecting a potential duplicated Stream ID in arm_smmu_install_ste_for_dev(), change the arm_smmu_insert_master() routine to ignore a duplicated ID from the fwspec->sids array as well. Note: this has been failing the iommu_device_probe() since 2021, although a recent iommu commit in v6.15-rc1 that moves iommu_device_probe() started to fail the SMMU driver probe. Since nobody has cared about DMA Alias support, leave that as it was but fix the fundamental iommu_device_probe() breakage. Fixes: cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250415185620.504299-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5467f85dd463..0826b6bdf327 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3388,6 +3388,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3398,10 +3399,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "stream %u already in tree from dev %s\n", sid, + dev_name(existing_master->dev)); ret = -EINVAL; break; } From 2d00c34d665bc23f5200962dbc4ac1919317036c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 11 Apr 2025 15:09:14 +0100 Subject: [PATCH 024/239] iommu/arm-smmu-v3: Fail aliasing StreamIDs more gracefully We've never supported StreamID aliasing between devices, and as such they will never have had functioning DMA, but this is not fatal to the SMMU itself. Although aliasing between hard-wired platform device StreamIDs would tend to raise questions about the whole system, in practice it's far more likely to occur relatively innocently due to legacy PCI bridges, where the underlying StreamID mappings are still perfectly reasonable. As such, return a more benign -ENODEV when failing probe for such an unsupported device (and log a more obvious error message), so that it doesn't break the entire SMMU probe now that bus_iommu_probe() runs in the right order and can propagate that error back. The end result is still that the device doesn't get an IOMMU group and probably won't work, same as before. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/39d54e49c8476efc4653e352150d44b185d6d50f.1744380554.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0826b6bdf327..48d910399a1b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3411,9 +3411,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, continue; dev_warn(master->dev, - "stream %u already in tree from dev %s\n", sid, - dev_name(existing_master->dev)); - ret = -EINVAL; + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } From 8dee308e4c01dea48fc104d37f92d5b58c50b96c Mon Sep 17 00:00:00 2001 From: Pavel Paklov Date: Tue, 25 Mar 2025 09:22:44 +0000 Subject: [PATCH 025/239] iommu/amd: Fix potential buffer overflow in parse_ivrs_acpihid There is a string parsing logic error which can lead to an overflow of hid or uid buffers. Comparing ACPIID_LEN against a total string length doesn't take into account the lengths of individual hid and uid buffers so the check is insufficient in some cases. For example if the length of hid string is 4 and the length of the uid string is 260, the length of str will be equal to ACPIID_LEN + 1 but uid string will overflow uid buffer which size is 256. The same applies to the hid string with length 13 and uid string with length 250. Check the length of hid and uid strings separately to prevent buffer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Pavel Paklov Link: https://lore.kernel.org/r/20250325092259.392844-1-Pavel.Paklov@cyberprotect.ru Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dd9e26b7b718..14aa0d77df26 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3664,6 +3664,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From 30a3f2f3e4bd6335b727c83c08a982d969752bc1 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 14 Apr 2025 12:16:35 -0700 Subject: [PATCH 026/239] iommu: Fix two issues in iommu_copy_struct_from_user() In the review for iommu_copy_struct_to_user() helper, Matt pointed out that a NULL pointer should be rejected prior to dereferencing it: https://lore.kernel.org/all/86881827-8E2D-461C-BDA3-FA8FD14C343C@nvidia.com And Alok pointed out a typo at the same time: https://lore.kernel.org/all/480536af-6830-43ce-a327-adbd13dc3f1d@oracle.com Since both issues were copied from iommu_copy_struct_from_user(), fix them first in the current header. Fixes: e9d36c07bb78 ("iommu: Add iommu_copy_struct_from_user helper") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Acked-by: Alok Tiwari Reviewed-by: Matthew R. Ochs Link: https://lore.kernel.org/r/20250414191635.450472-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a..3a8d35d41fda 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -440,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -456,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ From 4f1492efb495bcef34c9ee8a94af81e6cea5abf4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Apr 2025 15:36:08 +0800 Subject: [PATCH 027/239] iommu/vt-d: Revert ATS timing change to fix boot failure Commit <5518f239aff1> ("iommu/vt-d: Move scalable mode ATS enablement to probe path") changed the PCI ATS enablement logic to run earlier, specifically before the default domain attachment. On some client platforms, this change resulted in boot failures, causing the kernel to panic with the following message and call trace: Kernel panic - not syncing: DMAR hardware is malfunctioning CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc3+ #175 Call Trace: dump_stack_lvl+0x6f/0xb0 dump_stack+0x10/0x16 panic+0x10a/0x2b7 iommu_enable_translation.cold+0xc/0xc intel_iommu_init+0xe39/0xec0 ? trace_hardirqs_on+0x1e/0xd0 ? __pfx_pci_iommu_init+0x10/0x10 pci_iommu_init+0xd/0x40 do_one_initcall+0x5b/0x390 kernel_init_freeable+0x26d/0x2b0 ? __pfx_kernel_init+0x10/0x10 kernel_init+0x15/0x120 ret_from_fork+0x35/0x60 ? __pfx_kernel_init+0x10/0x10 ret_from_fork_asm+0x1a/0x30 RIP: 1f0f:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0000:0000000000000000 EFLAGS: 841f0f2e66 ORIG_RAX: 1f0f2e6600000000 RAX: 0000000000000000 RBX: 1f0f2e6600000000 RCX: 2e66000000000084 RDX: 0000000000841f0f RSI: 000000841f0f2e66 RDI: 00841f0f2e660000 RBP: 00841f0f2e660000 R08: 00841f0f2e660000 R09: 000000841f0f2e66 R10: 0000000000841f0f R11: 2e66000000000084 R12: 000000841f0f2e66 R13: 0000000000841f0f R14: 2e66000000000084 R15: 1f0f2e6600000000 ---[ end Kernel panic - not syncing: DMAR hardware is malfunctioning ]--- Fix this by reverting the timing change for ATS enablement introduced by the offending commit and restoring the previous behavior. Fixes: 5518f239aff1 ("iommu/vt-d: Move scalable mode ATS enablement to probe path") Reported-by: Jarkko Nikula Closes: https://lore.kernel.org/linux-iommu/01b9c72f-460d-4f77-b696-54c6825babc9@linux.intel.com/ Signed-off-by: Lu Baolu Tested-by: Jarkko Nikula Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250416073608.1799578-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b..e60b699e7b8c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3785,20 +3785,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); - /* - * The PCIe spec, in its wisdom, declares that the behaviour of the - * device is undefined if you enable PASID support after ATS support. - * So always enable PASID support on devices which have it, even if - * we can't yet know if we're ever going to use it. - */ - if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) - info->pasid_enabled = 1; - - if (sm_supported(iommu)) - iommu_enable_pci_ats(info); - iommu_enable_pci_pri(info); - return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -3810,6 +3796,26 @@ free: return ERR_PTR(ret); } +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * The PCIe spec, in its wisdom, declares that the behaviour of the + * device is undefined if you enable PASID support after ATS support. + * So always enable PASID support on devices which have it, even if + * we can't yet know if we're ever going to use it. + */ + if (info->pasid_supported && + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) + info->pasid_enabled = 1; + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + iommu_enable_pci_ats(info); + iommu_enable_pci_pri(info); +} + static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); @@ -4391,6 +4397,7 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, From 494d0939b1bda4d4ddca7d52a6ce6f808ff2c9a5 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 1 Apr 2025 15:04:02 +0800 Subject: [PATCH 028/239] ALSA: hda/realtek - Enable speaker for HP platform The speaker doesn't mute when plugged headphone. This platform support 4ch speakers. The speaker pin 0x14 wasn't fill verb table. After assigned model ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX. The speaker can mute when headphone was plugged. Fixes: aa8e3ef4fe53 ("ALSA: hda/realtek: Add quirks for various HP ENVY models") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/eb4c14a4d85740069c909e756bbacb0e@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 877137cb09ac..8ed613932c5b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -441,6 +441,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); fallthrough; case 0x10ec0215: + case 0x10ec0236: + case 0x10ec0245: + case 0x10ec0256: + case 0x10ec0257: case 0x10ec0285: case 0x10ec0289: alc_update_coef_idx(codec, 0x36, 1<<13, 0); @@ -448,12 +452,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0233: case 0x10ec0235: - case 0x10ec0236: - case 0x10ec0245: case 0x10ec0255: - case 0x10ec0256: case 0x19e58326: - case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: @@ -10768,8 +10768,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8caf, "HP Elite mt645 G8 Mobile Thin Client", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8cbd, "HP Pavilion Aero Laptop 13-bg0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), - SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), + SND_PCI_QUIRK(0x103c, 0x8cde, "HP OmniBook Ultra Flip Laptop 14t", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), From f406005e162b660dc405b4f18bf7bcb93a515608 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Thu, 17 Apr 2025 04:19:23 +0930 Subject: [PATCH 029/239] ALSA: usb-audio: Add retry on -EPROTO from usb_set_interface() During initialisation of Focusrite USB audio interfaces, -EPROTO is sometimes returned from usb_set_interface(), which sometimes prevents the device from working: subsequent usb_set_interface() and uac_clock_source_is_valid() calls fail. This patch adds up to 5 retries in endpoint_set_interface(), with a delay starting at 5ms and doubling each time. 5 retries was chosen to allow for longer than expected waits for the interface to start responding correctly; in testing, a single 5ms delay was sufficient to fix the issue. Closes: https://github.com/geoffreybennett/fcp-support/issues/2 Cc: stable@vger.kernel.org Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/Z//7s9dKsmVxHzY2@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index a29f28eb7d0c..f36ec98da460 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -926,6 +926,8 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, { int altset = set ? ep->altsetting : 0; int err; + int retries = 0; + const int max_retries = 5; if (ep->iface_ref->altset == altset) return 0; @@ -935,8 +937,13 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n", ep->iface, altset, ep->ep_num); +retry: err = usb_set_interface(chip->dev, ep->iface, altset); if (err < 0) { + if (err == -EPROTO && ++retries <= max_retries) { + msleep(5 * (1 << (retries - 1))); + goto retry; + } usb_audio_err_ratelimited( chip, "%d:%d: usb_set_interface failed (%d)\n", ep->iface, altset, err); From 4b98bf3bff7353d94824c4d874ff2d7f38acc49a Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 11 Mar 2025 20:41:12 +0100 Subject: [PATCH 030/239] arm64: dts: imx8mp: configure GPU and NPU clocks in nominal DTSI Commit 255fbd9eabe7 ("arm64: dts: imx8mp: Add optional nominal drive mode DTSI") added imx8mp-nominal.dtsi, which overrides all overdrive clock rates in imx8mp.dtsi to the nominal rates. At the same time, commit 9f7595b3e5ae ("arm64: dts: imx8mp: configure GPU and NPU clocks to overdrive rate") went in, which changed some clock rates away from the nominal values. Resolve the discrepancy by effectively reverting the changes in the latter commit inside imx8mp-nominal.dtsi. This is required for proper operation of the imx8mp-skov boards, which are currently imx8mp-nominal.dtsi's only users and lets all other boards that don't include it benefit from the new higher frequencies. Signed-off-by: Ahmad Fatoum Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-nominal.dtsi | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi index a1b75c9068b2..dc0ccd723c6d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi @@ -24,6 +24,20 @@ fsl,operating-mode = "nominal"; }; +&gpu2d { + assigned-clocks = <&clk IMX8MP_CLK_GPU2D_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>; +}; + +&gpu3d { + assigned-clocks = <&clk IMX8MP_CLK_GPU3D_CORE>, + <&clk IMX8MP_CLK_GPU3D_SHADER_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, <800000000>; +}; + &pgc_hdmimix { assigned-clocks = <&clk IMX8MP_CLK_HDMI_AXI>, <&clk IMX8MP_CLK_HDMI_APB>; @@ -46,6 +60,18 @@ assigned-clock-rates = <600000000>, <300000000>; }; +&pgc_mlmix { + assigned-clocks = <&clk IMX8MP_CLK_ML_CORE>, + <&clk IMX8MP_CLK_ML_AXI>, + <&clk IMX8MP_CLK_ML_AHB>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, + <800000000>, + <300000000>; +}; + &media_blk_ctrl { assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>, <&clk IMX8MP_CLK_MEDIA_APB>, From 02e4232998db357bb8199778722d81ffcff0cb98 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Fri, 14 Mar 2025 14:01:04 +0800 Subject: [PATCH 031/239] arm64: dts: imx95: Correct the range of PCIe app-reg region Correct the range of PCIe app-reg region from 0x2000 to 0x4000 refer to SerDes_SS memory map of i.MX95 Rerference Manual. Fixes: 3b1d5deb29ff ("arm64: dts: imx95: add pcie[0,1] and pcie-ep[0,1] support") Signed-off-by: Richard Zhu Reviewed-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 9bb26b466a06..59f057ba6fa7 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1626,7 +1626,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x60100000 0 0xfe00000>, <0 0x4c360000 0 0x10000>, - <0 0x4c340000 0 0x2000>; + <0 0x4c340000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0x0 0x00000000 0x0 0x6ff00000 0 0x00100000>, <0x82000000 0x0 0x10000000 0x9 0x10000000 0 0x10000000>; @@ -1673,7 +1673,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x4c360000 0 0x1000>, <0 0x4c320000 0 0x1000>, - <0 0x4c340000 0 0x2000>, + <0 0x4c340000 0 0x4000>, <0 0x4c370000 0 0x10000>, <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; @@ -1700,7 +1700,7 @@ reg = <0 0x4c380000 0 0x10000>, <8 0x80100000 0 0xfe00000>, <0 0x4c3e0000 0 0x10000>, - <0 0x4c3c0000 0 0x2000>; + <0 0x4c3c0000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0 0x00000000 0x8 0x8ff00000 0 0x00100000>, <0x82000000 0 0x10000000 0xa 0x10000000 0 0x10000000>; @@ -1749,7 +1749,7 @@ reg = <0 0x4c380000 0 0x10000>, <0 0x4c3e0000 0 0x1000>, <0 0x4c3a0000 0 0x1000>, - <0 0x4c3c0000 0 0x2000>, + <0 0x4c3c0000 0 0x4000>, <0 0x4c3f0000 0 0x10000>, <0xa 0 1 0>; reg-names = "dbi", "atu", "dbi2", "app", "dma", "addr_space"; From 6e1a7bc8382b0d4208258f7d2a4474fae788dd90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 14 Mar 2025 17:20:38 +0100 Subject: [PATCH 032/239] ARM: dts: opos6ul: add ksz8081 phy properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") removed a PHY fixup that setted the clock mode and the LED mode. Make the Ethernet interface work again by doing as advised in the commit's log, set clock mode and the LED mode in the device tree. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Sébastien Szymanski Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2..dda4fa91b2f2 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; From 7e21ea8149a0e41c3666ee52cc063a6f797a7a2a Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Tue, 15 Apr 2025 12:06:16 +0300 Subject: [PATCH 033/239] drm/i915/pxp: fix undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' On x86_64 with gcc version 13.3.0, I compile kernel with: make defconfig ./scripts/kconfig/merge_config.sh .config <( echo CONFIG_COMPILE_TEST=y ) make KCFLAGS="-fno-inline-functions -fno-inline-small-functions -fno-inline-functions-called-once" Then I get a linker error: ld: vmlinux.o: in function `pxp_fw_dependencies_completed': kintel_pxp.c:(.text+0x95728f): undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' This is caused by not having a intel_pxp_gsccs_is_ready_for_sessions() header stub for CONFIG_DRM_I915_PXP=n. Add it. Signed-off-by: Chen Linxuan Fixes: 99afb7cc8c44 ("drm/i915/pxp: Add ARB session creation and cleanup") Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250415090616.2649889-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit b484c1e225a6a582fc78c4d7af7b286408bb7d41) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da3..4969d3de2bac 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} + #endif -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); - #endif /*__INTEL_PXP_GSCCS_H__ */ From 460b14b0929fa9f658a7e159ef646ce456962ab0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Apr 2025 13:27:53 +0200 Subject: [PATCH 034/239] spi: stm32-ospi: Fix an error handling path in stm32_ospi_probe() If an error occurs after a successful stm32_ospi_dma_setup() call, some dma_release_channel() calls are needed to release some resources, as already done in the remove function. Fixes: 79b8a705e26c ("spi: stm32: Add OSPI driver") Signed-off-by: Christophe JAILLET Reviewed-by: Patrice Chotard Link: https://patch.msgid.link/2674c8df1d05ab312826b69bfe9559f81d125a0b.1744975624.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 668022098b1e..9ec9823409cc 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -960,6 +960,10 @@ err_pm_resume: err_pm_enable: pm_runtime_force_suspend(ospi->dev); mutex_destroy(&ospi->lock); + if (ospi->dma_chtx) + dma_release_channel(ospi->dma_chtx); + if (ospi->dma_chrx) + dma_release_channel(ospi->dma_chrx); return ret; } From 34024cf69c51b11c2b608f5d578626b9b1c484f5 Mon Sep 17 00:00:00 2001 From: Hao Chang Date: Tue, 22 Apr 2025 15:52:05 +0800 Subject: [PATCH 035/239] pinctrl: mediatek: Fix new design debounce issue Calculate the true offset of eint according to index. Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Signed-off-by: Hao Chang Signed-off-by: Qingliang Li Link: https://lore.kernel.org/20250422075216.14073-1-ot_chhao.chang@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index ced4ee509b5b..b4eb2beab691 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -449,7 +449,7 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, return -EOPNOTSUPP; virq = irq_find_mapping(eint->domain, eint_num); - eint_offset = (eint_num % 4) * 8; + eint_offset = (idx % 4) * 8; d = irq_get_irq_data(virq); set_offset = (idx / 4) * 4 + eint->regs->dbnc_set; From 446d28584723e5d4bcdb18cf6ef87cb2bb597dd8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Apr 2025 11:23:15 +0300 Subject: [PATCH 036/239] pinctrl: mediatek: common-v1: Fix error checking in mtk_eint_init() The devm_kzalloc() function doesn't return error pointers, it returns NULL on error. Then on the next line it checks the same pointer again by mistake, "->base" instead of "->base[0]". Fixes: fe412e3a6c97 ("pinctrl: mediatek: common-v1: Fix EINT breakage on older controllers") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/aAijc10fHka1WAMX@stanley.mountain Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 7585de11854b..8596f3541265 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1018,12 +1018,12 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) pctl->eint->nbase = 1; /* mtk-eint expects an array */ pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); - if (IS_ERR(pctl->eint->base)) + if (!pctl->eint->base) return -ENOMEM; pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pctl->eint->base)) - return PTR_ERR(pctl->eint->base); + if (IS_ERR(pctl->eint->base[0])) + return PTR_ERR(pctl->eint->base[0]); pctl->eint->irq = irq_of_parse_and_map(np, 0); if (!pctl->eint->irq) From 70ad2e6bd180f94be030aef56e59693e36d945f3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 23 Apr 2025 10:09:44 +0100 Subject: [PATCH 037/239] ASoC: cs42l43: Disable headphone clamps during type detection The headphone clamps cause fairly loud pops during type detect because they sink current from the detection process itself. Disable the clamps whilst the type detect runs, to improve the detection pop performance. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250423090944.1504538-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 20e6ab6f0d4a..6165ac16c3a9 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -654,6 +654,10 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) reinit_completion(&priv->type_detect); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK); + cs42l43_start_hs_bias(priv, true); regmap_update_bits(cs42l43->regmap, CS42L43_HS2, CS42L43_HSDET_MODE_MASK, 0x3 << CS42L43_HSDET_MODE_SHIFT); @@ -665,6 +669,9 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) CS42L43_HSDET_MODE_MASK, 0x2 << CS42L43_HSDET_MODE_SHIFT); cs42l43_stop_hs_bias(priv); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, 0); + if (!time_left) return -ETIMEDOUT; From bbe5679f30d7690a9b6838a583b9690ea73fe0e9 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 15 Apr 2025 14:19:00 +0200 Subject: [PATCH 038/239] drm/nouveau: Fix WARN_ON in nouveau_fence_context_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nouveau is mostly designed in a way that it's expected that fences only ever get signaled through nouveau_fence_signal(). However, in at least one other place, nouveau_fence_done(), can signal fences, too. If that happens (race) a signaled fence remains in the pending list for a while, until it gets removed by nouveau_fence_update(). Should nouveau_fence_context_kill() run in the meantime, this would be a bug because the function would attempt to set an error code on an already signaled fence. Have nouveau_fence_context_kill() check for a fence being signaled. Cc: stable@vger.kernel.org # v5.10+ Fixes: ea13e5abf807 ("drm/nouveau: signal pending fences when channel has been killed") Suggested-by: Christian König Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250415121900.55719-3-phasta@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7cc84472cece..edddfc036c6d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) From da6d7db8b1620521d093a973a0110898f6585ff9 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 23 Apr 2025 13:57:22 +0800 Subject: [PATCH 039/239] ASoC: soc-acpi-intel-ptl-match: add empty item to ptl_cs42l43_l3[] An empty item is required to terminate the look up loop. Fixes: ac5b4a24f16f ("ASoC: Intel: soc-acpi-intel-ptl-match: Add cs42l43 support") Signed-off-by: Bard Liao Reviewed-by: Naveen Manohar Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250423055722.6920-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 6603d8de501c..c599eb43eeb1 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -431,7 +431,8 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { .mask = BIT(3), .num_adr = ARRAY_SIZE(cs42l43_3_adr), .adr_d = cs42l43_3_adr, - } + }, + {} }; static const struct snd_soc_acpi_link_adr ptl_rt722_only[] = { From bfb713ea53c746b07ae69fe97fa9b5388e4f34f9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 17 Apr 2025 14:55:50 +0100 Subject: [PATCH 040/239] perf tools: Fix arm64 build by generating unistd_64.h Since pulling in the kernel changes in commit 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources"), arm64 is no longer using a generic syscall header and generates one from the syscall table. Therefore we must also generate the syscall header for arm64 before building Perf. Add it as a dependency to libperf which uses one syscall number. Perf uses more, but as libperf is a dependency of Perf it will be generated for both. Future platforms that need this will have to add their own syscall-y targets in libperf manually. Unfortunately the arch specific files that do this (e.g. arch/arm64/include/asm/Kbuild) can't easily be imported into the Perf build. But Perf only needs a subset of the generated files anyway, so redefining them is probably the correct thing to do. Fixes: 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources") Signed-off-by: James Clark Tested-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20250417-james-perf-fix-gen-syscall-v1-1-1d268c923901@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 12 +++++++++++- tools/perf/Makefile.config | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index ffcfd777c451..1a19b5013f45 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ +-I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -99,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eea95c6c0c71..a52482654d4b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated +CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) From c1b0f5183a4488b6b7790f834ce3a786725b3583 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 10 Apr 2025 17:15:25 +0300 Subject: [PATCH 041/239] ASoC: renesas: rz-ssi: Use NOIRQ_SYSTEM_SLEEP_PM_OPS() In the latest kernel versions system crashes were noticed occasionally during suspend/resume. This occurs because the RZ SSI suspend trigger (called from snd_soc_suspend()) is executed after rz_ssi_pm_ops->suspend() and it accesses IP registers. After the rz_ssi_pm_ops->suspend() is executed the IP clocks are disabled and its reset line is asserted. Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(), snd_soc_pm_ops is associated with soc_driver (defined in sound/soc/soc-core.c), and there is no parent-child relationship between soc_driver and rz_ssi_driver the power management subsystem does not enforce a specific suspend/resume order between the RZ SSI platform driver and soc_driver. To ensure that the suspend/resume function of rz-ssi is executed after snd_soc_suspend(), use NOIRQ_SYSTEM_SLEEP_PM_OPS(). Fixes: 1fc778f7c833 ("ASoC: renesas: rz-ssi: Add suspend to RAM support") Cc: stable@vger.kernel.org Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250410141525.4126502-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rz-ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index 3a0af4ca7ab6..0f7458a43901 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -1244,7 +1244,7 @@ static int rz_ssi_runtime_resume(struct device *dev) static const struct dev_pm_ops rz_ssi_pm_ops = { RUNTIME_PM_OPS(rz_ssi_runtime_suspend, rz_ssi_runtime_resume, NULL) - SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; static struct platform_driver rz_ssi_driver = { From 722a6ad4867ce8c4cb131a3371d0b5389a75dee0 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 23 Apr 2025 21:31:57 +0200 Subject: [PATCH 042/239] spi: spi-qpic-snand: propagate errors from qcom_spi_block_erase() The qcom_spi_block_erase() function returns with error in case of failure. Change the qcom_spi_send_cmdaddr() function to propagate these errors to the callers instead of returning with success. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Abel Vesa Link: https://patch.msgid.link/20250423-qpic-snand-propagate-error-v1-1-4b26ed45fdb5@gmail.com Reviewed-by: Md Sadre Alam Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 17eb67e19132..ae32c452d0bc 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1307,8 +1307,7 @@ static int qcom_spi_send_cmdaddr(struct qcom_nand_controller *snandc, snandc->qspi->addr1 = cpu_to_le32(s_op.addr1_reg << 16); snandc->qspi->addr2 = cpu_to_le32(s_op.addr2_reg); snandc->qspi->cmd = cpu_to_le32(cmd); - qcom_spi_block_erase(snandc); - return 0; + return qcom_spi_block_erase(snandc); default: break; } From be8250786ca94952a19ce87f98ad9906448bc9ef Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 21 Apr 2025 15:52:32 +0800 Subject: [PATCH 043/239] mm, slab: clean up slab->obj_exts always When memory allocation profiling is disabled at runtime or due to an error, shutdown_mem_profiling() is called: slab->obj_exts which previously allocated remains. It won't be cleared by unaccount_slab() because of mem_alloc_profiling_enabled() not true. It's incorrect, slab->obj_exts should always be cleaned up in unaccount_slab() to avoid following error: [...]BUG: Bad page state in process... .. [...]page dumped because: page still charged to cgroup [andriy.shevchenko@linux.intel.com: fold need_slab_obj_ext() into its only user] Fixes: 21c690a349ba ("mm: introduce slabobj_ext to support slab object extensions") Cc: stable@vger.kernel.org Signed-off-by: Zhenhua Huang Acked-by: David Rientjes Acked-by: Harry Yoo Tested-by: Harry Yoo Acked-by: Suren Baghdasaryan Link: https://patch.msgid.link/20250421075232.2165527-1-quic_zhenhuah@quicinc.com Signed-off-by: Vlastimil Babka --- mm/slub.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index dc9e729e1d26..be8b09e09d30 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2028,8 +2028,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static noinline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2049,18 +2048,6 @@ static noinline void free_slab_obj_exts(struct slab *slab) slab->obj_exts = 0; } -static inline bool need_slab_obj_ext(void) -{ - if (mem_alloc_profiling_enabled()) - return true; - - /* - * CONFIG_MEMCG creates vector of obj_cgroup objects conditionally - * inside memcg_slab_post_alloc_hook. No other users for now. - */ - return false; -} - #else /* CONFIG_SLAB_OBJ_EXT */ static inline void init_slab_obj_exts(struct slab *slab) @@ -2077,11 +2064,6 @@ static inline void free_slab_obj_exts(struct slab *slab) { } -static inline bool need_slab_obj_ext(void) -{ - return false; -} - #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -2129,7 +2111,7 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) static inline void alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) + if (mem_alloc_profiling_enabled()) __alloc_tagging_slab_alloc_hook(s, object, flags); } @@ -2601,8 +2583,12 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online() || need_slab_obj_ext()) - free_slab_obj_exts(slab); + /* + * The slab object extensions should now be freed regardless of + * whether mem_alloc_profiling_enabled() or not because profiling + * might have been disabled after slab->obj_exts got allocated. + */ + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); From 1526a735a7620db8b22ea3d24d3ba7ac262b2aaf Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Thu, 10 Apr 2025 21:41:30 +0200 Subject: [PATCH 044/239] MAINTAINERS: add exclude for dt-bindings to imx entry Since the IMX (as in i.MX, the NXP SoCs) MAINTAINERS entry claims everything that contains the name "imx", hanges to device tree bindings for any Sony IMX image sensor are likely to be sent to the maintainers listed therein. Add the missing exclude to fix that. Fixes: da8b7f0fb02b ("MAINTAINERS: add all files matching "imx" and "mxs" to the IMX entry") Suggested-by: Sebastian Reichel Signed-off-by: Michael Riesch Reviewed-by: Sakari Ailus Signed-off-by: Shawn Guo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96b827049501..c87b26eada7b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2519,6 +2519,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git F: arch/arm/boot/dts/nxp/imx/ F: arch/arm/boot/dts/nxp/mxs/ F: arch/arm64/boot/dts/freescale/ +X: Documentation/devicetree/bindings/media/i2c/ X: arch/arm64/boot/dts/freescale/fsl-* X: arch/arm64/boot/dts/freescale/qoriq-* X: drivers/media/i2c/ From e53e004e346062e15df9511bd4b5a19e34701384 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Wed, 16 Apr 2025 12:26:16 +0200 Subject: [PATCH 045/239] accel/ivpu: Correct DCT interrupt handling Fix improper use of dct_active_percent field in DCT interrupt handler causing DCT to never get enabled. Set dct_active_percent internally before IPC to ensure correct driver value even if IPC fails. Set default DCT value to 30 accordingly to HW architecture specification. Fixes: a19bffb10c46 ("accel/ivpu: Implement DCT handling") Signed-off-by: Karol Wachowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250416102616.384577-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_btrs.h | 2 +- drivers/accel/ivpu/ivpu_pm.c | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 300f749971d4..d2d82651976d 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -14,7 +14,7 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index b5891e91f7ab..ac0e22454596 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -428,16 +428,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -445,15 +446,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } @@ -466,7 +468,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work) if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); From 759ee400d1d95ac903d81a1a229a117be822d077 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Wed, 16 Apr 2025 12:26:29 +0200 Subject: [PATCH 046/239] accel/ivpu: Fix the D0i2 disable test mode Correct setup of D0i2 disable which was by mistake set up to value 1 and use BIT(1) instead. Fixes: 011529fe8112 ("accel/ivpu: Implement D0i2 disable test mode") Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250416102629.384626-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 5e1d709c6a46..ccaaf6c100c0 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -544,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); - ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n", + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", boot_params->power_profile); } @@ -646,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; if (IVPU_WA(disable_d0i2)) - boot_params->power_profile = 1; + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ From 71cfb1f88f772fb92a68a4ab85b16ccd5cc8535d Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Fri, 25 Apr 2025 10:28:12 +0800 Subject: [PATCH 047/239] spi: dt-bindings: snps,dw-apb-ssi: Merge duplicate compatible entry Microsemi Ocelot/Jaguar2, Renesas RZ/N1 and T-HEAD TH1520 SoC-specific compatibles, which eventually fallback to the generic DW ssi compatible, it's better to combine them in single entry Suggested-by: Rob Herring Signed-off-by: Zixian Zeng Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250425-sfg-spi-v6-1-2dbe7bb46013@gmail.com Signed-off-by: Mark Brown --- .../bindings/spi/snps,dw-apb-ssi.yaml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index bccd00a1ddd0..a43d2fb9942d 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -56,19 +56,17 @@ properties: enum: - snps,dw-apb-ssi - snps,dwc-ssi-1.01a - - description: Microsemi Ocelot/Jaguar2 SoC SPI Controller - items: - - enum: - - mscc,ocelot-spi - - mscc,jaguar2-spi - - const: snps,dw-apb-ssi - description: Microchip Sparx5 SoC SPI Controller const: microchip,sparx5-spi - description: Amazon Alpine SPI Controller const: amazon,alpine-dw-apb-ssi - - description: Renesas RZ/N1 SPI Controller + - description: Vendor controllers which use snps,dw-apb-ssi as fallback items: - - const: renesas,rzn1-spi + - enum: + - mscc,ocelot-spi + - mscc,jaguar2-spi + - renesas,rzn1-spi + - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller const: intel,keembay-ssi @@ -88,10 +86,6 @@ properties: - renesas,r9a06g032-spi # RZ/N1D - renesas,r9a06g033-spi # RZ/N1S - const: renesas,rzn1-spi # RZ/N1 - - description: T-HEAD TH1520 SoC SPI Controller - items: - - const: thead,th1520-spi - - const: snps,dw-apb-ssi reg: minItems: 1 From 0889c4d28ad79b55ee8cf3c818e9d86203ace8f0 Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Fri, 25 Apr 2025 10:28:13 +0800 Subject: [PATCH 048/239] spi: dt-bindings: snps,dw-apb-ssi: Add compatible for SOPHGO SG2042 SoC Sophgo SG2042 ships an SPI controller [1] compatible with the Synopsys DW-SPI IP. Add SoC-specific compatible string and use the generic one as fallback. Link: https://github.com/sophgo/sophgo-doc/blob/main/SG2042/TRM/source/SPI.rst [1] Signed-off-by: Zixian Zeng Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250425-sfg-spi-v6-2-2dbe7bb46013@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index a43d2fb9942d..53d00ca643b3 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -66,6 +66,7 @@ properties: - mscc,ocelot-spi - mscc,jaguar2-spi - renesas,rzn1-spi + - sophgo,sg2042-spi - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller From 8e4d3d8a5e51e07bd0d6cdd81b5e4af79f796927 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 24 Apr 2025 17:43:33 +0530 Subject: [PATCH 049/239] spi: spi-mem: Add fix to avoid divide error For some SPI flash memory operations, dummy bytes are not mandatory. For example, in Winbond SPINAND flash memory devices, the `write_cache` and `update_cache` operation variants have zero dummy bytes. Calculating the duration for SPI memory operations with zero dummy bytes causes a divide error when `ncycles` is calculated in the spi_mem_calc_op_duration(). Add changes to skip the 'ncylcles' calculation for zero dummy bytes. Following divide error is fixed by this change: Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI ... ? do_trap+0xdb/0x100 ? do_error_trap+0x75/0xb0 ? spi_mem_calc_op_duration+0x56/0xb0 ? exc_divide_error+0x3b/0x70 ? spi_mem_calc_op_duration+0x56/0xb0 ? asm_exc_divide_error+0x1b/0x20 ? spi_mem_calc_op_duration+0x56/0xb0 ? spinand_select_op_variant+0xee/0x190 [spinand] spinand_match_and_init+0x13e/0x1a0 [spinand] spinand_manufacturer_match+0x6e/0xa0 [spinand] spinand_probe+0x357/0x7f0 [spinand] ? kernfs_activate+0x87/0xd0 spi_mem_probe+0x7a/0xb0 spi_probe+0x7d/0x130 Fixes: 226d6cb3cb79 ("spi: spi-mem: Estimate the time taken by operations") Suggested-by: Krishnamoorthi M Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20250424121333.417372-1-Raju.Rangoju@amd.com Reviewed-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index a31a1db07aa4..5db0639d3b01 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -596,7 +596,11 @@ u64 spi_mem_calc_op_duration(struct spi_mem_op *op) ns_per_cycles = 1000000000 / op->max_freq; ncycles += ((op->cmd.nbytes * 8) / op->cmd.buswidth) / (op->cmd.dtr ? 2 : 1); ncycles += ((op->addr.nbytes * 8) / op->addr.buswidth) / (op->addr.dtr ? 2 : 1); - ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + + /* Dummy bytes are optional for some SPI flash memory operations */ + if (op->dummy.nbytes) + ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + ncycles += ((op->data.nbytes * 8) / op->data.buswidth) / (op->data.dtr ? 2 : 1); return ncycles * ns_per_cycles; From ba85883d160515129b58873f74376a89faf21c7c Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:39 +0530 Subject: [PATCH 050/239] ASoC: amd: acp: Fix NULL pointer deref on acp resume path update chip data using dev_get_drvdata(dev->parent) instead of dev_get_platdata(dev). BUG: kernel NULL pointer dereference, address: 0000000000000010 Call Trace: ? __pfx_platform_pm_resume+0x10/0x10 platform_pm_resume+0x28/0x60 dpm_run_callback+0x51/0x1a0 device_resume+0x1a6/0x2b0 dpm_resume+0x168/0x230 Fixes: e3933683b25e ("ASoC: amd: acp: Remove redundant acp_dev_data structure") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-1-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-rembrandt.c | 2 +- sound/soc/amd/acp/acp-renoir.c | 2 +- sound/soc/amd/acp/acp63.c | 2 +- sound/soc/amd/acp/acp70.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/amd/acp/acp-rembrandt.c b/sound/soc/amd/acp/acp-rembrandt.c index 746b6ed72029..cccdd10c345e 100644 --- a/sound/soc/amd/acp/acp-rembrandt.c +++ b/sound/soc/amd/acp/acp-rembrandt.c @@ -199,7 +199,7 @@ static void rembrandt_audio_remove(struct platform_device *pdev) static int rmb_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp-renoir.c b/sound/soc/amd/acp/acp-renoir.c index ebf0106fc737..04f6d70b6a92 100644 --- a/sound/soc/amd/acp/acp-renoir.c +++ b/sound/soc/amd/acp/acp-renoir.c @@ -146,7 +146,7 @@ static void renoir_audio_remove(struct platform_device *pdev) static int rn_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp63.c b/sound/soc/amd/acp/acp63.c index 52d895e624c7..1f15c96a9b94 100644 --- a/sound/soc/amd/acp/acp63.c +++ b/sound/soc/amd/acp/acp63.c @@ -250,7 +250,7 @@ static void acp63_audio_remove(struct platform_device *pdev) static int acp63_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp70.c b/sound/soc/amd/acp/acp70.c index 6d5f5ade075c..217b717e9beb 100644 --- a/sound/soc/amd/acp/acp70.c +++ b/sound/soc/amd/acp/acp70.c @@ -182,7 +182,7 @@ static void acp_acp70_audio_remove(struct platform_device *pdev) static int acp70_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; From 6d9b64156d849e358cb49b6b899fb0b7d262bda8 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:40 +0530 Subject: [PATCH 051/239] ASoC: amd: acp: Fix NULL pointer deref in acp_i2s_set_tdm_slot Update chip data using dev_get_drvdata(dev->parent) to fix NULL pointer deref in acp_i2s_set_tdm_slot. Fixes: cd60dec8994c ("ASoC: amd: acp: Refactor TDM slots selction based on acp revision id") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index a38409dd1d34..70fa54d568ef 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -97,7 +97,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas struct acp_stream *stream; int slot_len, no_of_slots; - chip = dev_get_platdata(dev); + chip = dev_get_drvdata(dev->parent); switch (slot_width) { case SLOT_WIDTH_8: slot_len = 8; From 138e6da0392ed067d0db7b5b5b4582c3668cfcf9 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:41 +0530 Subject: [PATCH 052/239] ASoC: amd: acp: Fix devm_snd_soc_register_card(acp-pdm-mach) failure Add condition check to fix devm_snd_soc_register_card(acp-pdm-mach) deferred probe failure, when pdm DSD entry is not available. [15.910456] acp_mach acp-pdm-mach: devm_snd_soc_register_card(acp-pdm-mach) failed: -517 [15.910536] platform acp-pdm-mach: deferred probe pending: (reason unknown) Fixes: 6e60db74b69c2 ("ASoC: amd: acp: Refactor acp machine select") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-legacy-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-legacy-common.c b/sound/soc/amd/acp/acp-legacy-common.c index b4d68484e06d..ba8db0851daa 100644 --- a/sound/soc/amd/acp/acp-legacy-common.c +++ b/sound/soc/amd/acp/acp-legacy-common.c @@ -450,7 +450,7 @@ int acp_machine_select(struct acp_chip_info *chip) struct snd_soc_acpi_mach *mach; int size, platform; - if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC) { + if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC && chip->is_pdm_dev) { platform = chip->acp_rev; chip->mach_dev = platform_device_register_data(chip->dev, "acp-pdm-mach", PLATFORM_DEVID_NONE, &platform, From a549b927ea3f5e50b1394209b64e6e17e31d4db8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 20 Apr 2025 10:56:59 +0200 Subject: [PATCH 053/239] ASoC: Intel: bytcr_rt5640: Add DMI quirk for Acer Aspire SW3-013 Acer Aspire SW3-013 requires the very same quirk as other Acer Aspire model for making it working. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220011 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250420085716.12095-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 6446cda0f857..0f3b8f44e701 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -576,6 +576,19 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, + { /* Acer Aspire SW3-013 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW3-013"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), From 75aea4b0656ead0facd13d2aae4cb77326e53d2f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:14 -0700 Subject: [PATCH 054/239] perf/x86/intel: Only check the group flag for X86 leader A warning in intel_pmu_lbr_counters_reorder() may be triggered by below perf command. perf record -e "{cpu-clock,cycles/call-graph="lbr"/}" -- sleep 1 It's because the group is mistakenly treated as a branch counter group. The hw.flags of the leader are used to determine whether a group is a branch counters group. However, the hw.flags is only available for a hardware event. The field to store the flags is a union type. For a software event, it's a hrtimer. The corresponding bit may be set if the leader is a software event. For a branch counter group and other groups that have a group flag (e.g., topdown, PEBS counters snapshotting, and ACR), the leader must be a X86 event. Check the X86 event before checking the flag. The patch only fixes the issue for the branch counter group. The following patch will fix the other groups. There may be an alternative way to fix the issue by moving the hw.flags out of the union type. It should work for now. But it's still possible that the flags will be used by other types of events later. As long as that type of event is used as a leader, a similar issue will be triggered. So the alternative way is dropped. Fixes: 33744916196b ("perf/x86/intel: Support branch counters logging") Closes: https://lore.kernel.org/lkml/20250412091423.1839809-1-luogengkun@huaweicloud.com/ Reported-by: Luo Gengkun Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250424134718.311934-2-kan.liang@linux.intel.com --- arch/x86/events/core.c | 2 +- arch/x86/events/perf_event.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3a4f031d2f44..139ad80d1df3 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -754,7 +754,7 @@ void x86_pmu_enable_all(int added) } } -static inline int is_x86_event(struct perf_event *event) +int is_x86_event(struct perf_event *event) { int i; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2c0ce0e9545e..4237c379cdc5 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -110,9 +110,16 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + static inline bool is_branch_counters_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); } static inline bool is_pebs_counter_event_group(struct perf_event *event) From e9988ad7b1744991118ac348a804f9395368a284 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:15 -0700 Subject: [PATCH 055/239] perf/x86/intel: Check the X86 leader for pebs_counter_event_group The PEBS counters snapshotting group also requires a group flag in the leader. The leader must be a X86 event. Fixes: e02e9b0374c3 ("perf/x86/intel: Support PEBS counters snapshotting") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424134718.311934-3-kan.liang@linux.intel.com --- arch/x86/events/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4237c379cdc5..46d120597bab 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -124,7 +124,7 @@ static inline bool is_branch_counters_group(struct perf_event *event) static inline bool is_pebs_counter_event_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR; + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); } struct amd_nb { From 7da9960b59fb7e590eb8538c9428db55a4ea2d23 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:18 -0700 Subject: [PATCH 056/239] perf/x86/intel/ds: Fix counter backwards of non-precise events counters-snapshotting The counter backwards may be observed in the PMI handler when counters-snapshotting some non-precise events in the freq mode. For the non-precise events, it's possible the counters-snapshotting records a positive value for an overflowed PEBS event. Then the HW auto-reload mechanism reset the counter to 0 immediately. Because the pebs_event_reset is cleared in the freq mode, which doesn't set the PERF_X86_EVENT_AUTO_RELOAD. In the PMI handler, 0 will be read rather than the positive value recorded in the counters-snapshotting record. The counters-snapshotting case has to be specially handled. Since the event value has been updated when processing the counters-snapshotting record, only needs to set the new period for the counter via x86_pmu_set_period(). Fixes: e02e9b0374c3 ("perf/x86/intel: Support PEBS counters snapshotting") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424134718.311934-6-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c3ab579b8b..9b20acc0e932 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2379,8 +2379,25 @@ __intel_pmu_pebs_last_event(struct perf_event *event, */ intel_pmu_save_and_restart_reload(event, count); } - } else - intel_pmu_save_and_restart(event); + } else { + /* + * For a non-precise event, it's possible the + * counters-snapshotting records a positive value for the + * overflowed event. Then the HW auto-reload mechanism + * reset the counter to 0 immediately, because the + * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD + * is not set. The counter backwards may be observed in a + * PMI handler. + * + * Since the event value has been updated when processing the + * counters-snapshotting record, only needs to set the new + * period for the counter. + */ + if (is_pebs_counter_event_group(event)) + static_call(x86_pmu_set_period)(event); + else + intel_pmu_save_and_restart(event); + } } static __always_inline void From 3d59224947b024c9b2aa6e149a1537d449adb828 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Apr 2025 21:50:14 +0530 Subject: [PATCH 057/239] cpufreq: ACPI: Re-sync CPU boost state on system resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During CPU hotunplug events (such as those occurring during suspend/resume cycles), platform firmware may modify the CPU boost state. If boost was disabled prior to CPU removal, it correctly remains disabled upon re-plug. However, if firmware re-enables boost while the CPU is offline, the CPU may return with boost enabled—even if it was originally disabled—once it is hotplugged back in. This leads to inconsistent behavior and violates user or kernel policy expectations. To maintain consistency, ensure the boost state is re-synchronized with the kernel policy when a CPU is hotplugged back in. Note: This re-synchronization is not necessary during the initial call to ->init() for a CPU, as the cpufreq core handles it via cpufreq_online(). At that point, acpi_cpufreq_driver.boost_enabled is initialized to the value returned by boost_state(0). Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") Reported-by: Nicholas Chin Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013 Tested-by: Nicholas Chin Reviewed-by: Lifeng Zheng Signed-off-by: Viresh Kumar Link: https://patch.msgid.link/9c7de55fb06015c1b77e7dafd564b659838864e0.1745511526.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 924314cdeebc..d26b610e4f24 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,8 +909,19 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) - policy->boost_supported = true; + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } return result; From e8fa236e28811473db1594c597f974da0e9b753b Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Fri, 25 Apr 2025 18:36:18 +0800 Subject: [PATCH 058/239] ALSA: hda: Apply volume control on speaker+lineout for HP EliteStudio AIO This hardware has ALC274 codec with speaker NID 0x17 and line out NID 0x16 for audio output. The line out is routed correctly but the speaker is not. Thus the volume can't be controlled. This patch removes DAC NID 0x06 (without volume control) from the connection list for speaker NID 0x17. Routing both speaker and line out pins to DAC NID 0x02 which controls the output volume. Signed-off-by: Chris Chiu Link: https://patch.msgid.link/20250425103618.534951-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8ed613932c5b..38ed264c3a49 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6742,6 +6742,25 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, codec->power_save_node = 0; } +/* avoid DAC 0x06 for speaker switch 0x17; it has no volume control */ +static void alc274_fixup_hp_aio_bind_dacs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */ + /* The speaker is routed to the Node 0x06 by a mistake, thus the + * speaker's volume can't be adjusted since the node doesn't have + * Amp-out capability. Assure the speaker and lineout pin to be + * coupled with DAC NID 0x02. + */ + static const hda_nid_t preferred_pairs[] = { + 0x16, 0x02, 0x17, 0x02, 0x21, 0x03, 0 + }; + struct alc_spec *spec = codec->spec; + + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + spec->gen.preferred_dacs = preferred_pairs; +} + /* avoid DAC 0x06 for bass speaker 0x17; it has no volume control */ static void alc289_fixup_asus_ga401(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -7970,6 +7989,7 @@ enum { ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, ALC294_FIXUP_ASUS_CS35L41_SPI_2, + ALC274_FIXUP_HP_AIO_BIND_DACS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10340,6 +10360,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC, }, + [ALC274_FIXUP_HP_AIO_BIND_DACS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_hp_aio_bind_dacs, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10774,6 +10798,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite X360 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10793,6 +10818,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), From be0c40da888840fe91b45474cb70779e6cbaf7ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 27 Apr 2025 10:10:34 +0200 Subject: [PATCH 059/239] ALSA: hda/realtek: Add quirk for HP Spectre x360 15-df1xxx HP Spectre x360 15-df1xxx with SSID 13c:863e requires similar workarounds that were applied to another HP Spectre x360 models; it has a mute LED only, no micmute LEDs, and needs the speaker GPIO seup. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220054 Link: https://patch.msgid.link/20250427081035.11567-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 38ed264c3a49..1799203af35a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6982,6 +6982,41 @@ static void alc285_fixup_hp_spectre_x360_eb1(struct hda_codec *codec, } } +/* GPIO1 = amplifier on/off */ +static void alc285_fixup_hp_spectre_x360_df1(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + static const hda_nid_t conn[] = { 0x02 }; + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170110 }, /* front/high speakers */ + { 0x17, 0x90170130 }, /* back/bass speakers */ + { } + }; + + // enable mute led + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* needed for amp of back speakers */ + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + snd_hda_apply_pincfgs(codec, pincfgs); + /* share DAC to have unified volume control */ + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn), conn); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp of back speakers */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7780,6 +7815,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_SPECTRE_X360_DF1, ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -9857,6 +9893,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_SPECTRE_X360_DF1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_df1 + }, [ALC285_FIXUP_HP_ENVY_X360] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_envy_x360, @@ -10588,6 +10628,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x863e, "HP Spectre x360 15-df1xxx", ALC285_FIXUP_HP_SPECTRE_X360_DF1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), @@ -11520,6 +11561,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360_DF1, .name = "alc285-hp-spectre-x360-df1"}, {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, From 5591ce0069ddda97cdbbea596bed53e698f399c2 Mon Sep 17 00:00:00 2001 From: Wojciech Dubowik Date: Thu, 24 Apr 2025 11:59:14 +0200 Subject: [PATCH 060/239] arm64: dts: imx8mm-verdin: Link reg_usdhc2_vqmmc to usdhc2 Define vqmmc regulator-gpio for usdhc2 with vin-supply coming from LDO5. Without this definition LDO5 will be powered down, disabling SD card after bootup. This has been introduced in commit f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5"). Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Fixes: f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5") Tested-by: Manuel Traut Reviewed-by: Philippe Schenker Tested-by: Francesco Dolcini Reviewed-by: Francesco Dolcini Cc: stable@vger.kernel.org Signed-off-by: Wojciech Dubowik Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mm-verdin.dtsi | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 7251ad3a0017..b46566f3ce20 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -144,6 +144,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -269,7 +282,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -785,6 +798,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1206,13 +1220,17 @@ ; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + ; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - , , /* SODIMM 78 */ , /* SODIMM 74 */ , /* SODIMM 80 */ @@ -1223,7 +1241,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - , , , , @@ -1234,7 +1251,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - , , , , @@ -1246,7 +1262,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - , , , , From 1149719442d28c96dc63cad432b5a6db7c300e1a Mon Sep 17 00:00:00 2001 From: Joachim Priesner Date: Mon, 28 Apr 2025 07:36:06 +0200 Subject: [PATCH 061/239] ALSA: usb-audio: Add second USB ID for Jabra Evolve 65 headset There seem to be multiple USB device IDs used for these; the one I have reports as 0b0e:030c when powered on. (When powered off, it reports as 0b0e:0311.) Signed-off-by: Joachim Priesner Cc: Link: https://patch.msgid.link/20250428053606.9237-1-joachim.priesner@web.de Signed-off-by: Takashi Iwai --- sound/usb/format.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 9d32b21a5fbb..0ba4641a0eb1 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -260,7 +260,8 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof } /* Jabra Evolve 65 headset */ - if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + if (chip->usb_id == USB_ID(0x0b0e, 0x030b) || + chip->usb_id == USB_ID(0x0b0e, 0x030c)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); From 76047483fe94414edf409dc498498abf346e22f1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 23 Apr 2025 09:54:42 +0530 Subject: [PATCH 062/239] drm/ttm: fix the warning for hit_low and evict_low MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix the below warning messages: ttm/ttm_bo.c:1098: warning: Function parameter or struct member 'hit_low' not described in 'ttm_bo_swapout_walk' ttm/ttm_bo.c:1098: warning: Function parameter or struct member 'evict_low' not described in 'ttm_bo_swapout_walk' Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Signed-off-by: Sunil Khatri Reviewed-by: Maarten Lankhorst Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250423042442.762108-1-sunil.khatri@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 95b86003c50d..5bf3c969907c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1093,7 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; - + /** @hit_low: Whether we should attempt to swap BO's with low watermark threshold */ + /** @evict_low: If we cannot swap a bo when @try_low is false (first pass) */ bool hit_low, evict_low; }; From 8d16dd7b651b75ce7c79da3539553a25e60668f5 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 24 Apr 2025 11:06:53 +0800 Subject: [PATCH 063/239] MAINTAINERS: erofs: add myself as reviewer I have a solid background in file systems and since much of my recent work has focused on EROFS, I am familiar with it. Now I have the time and am willing to help review EROFS patches. I hope my participation can be helpful to the EROFS patch review process. Signed-off-by: Hongbo Li Acked-by: Chao Yu Acked-by: Gao Xiang Link: https://lore.kernel.org/r/20250424030653.3308358-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3cbf9ac0d83f..b53a268e4a92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8726,6 +8726,7 @@ M: Chao Yu R: Yue Hu R: Jeffle Xu R: Sandeep Dhavale +R: Hongbo Li L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org From 4fb7b8fceb0beebbe00712c3daf49ade0386076a Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:39 -0700 Subject: [PATCH 064/239] EDAC/altera: Test the correct error reg offset Test correct structure member, ecc_cecnt_offset, before using it. [ bp: Massage commit message. ] Fixes: 73bcc942f427 ("EDAC, altera: Add Arria10 EDAC support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-2-matthew.gerlach@altera.com --- drivers/edac/altera_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3e971f902363..88d9d2f458ee 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -99,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, From 6dbe3c5418c4368e824bff6ae4889257dd544892 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:40 -0700 Subject: [PATCH 065/239] EDAC/altera: Set DDR and SDMMC interrupt mask before registration Mask DDR and SDMMC in probe function to avoid spurious interrupts before registration. Removed invalid register write to system manager. Fixes: 1166fde93d5b ("EDAC, altera: Add Arria10 ECC memory init functions") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-3-matthew.gerlach@altera.com --- drivers/edac/altera_edac.c | 7 ++++--- drivers/edac/altera_edac.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 88d9d2f458ee..dcd7008fe06b 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1005,9 +1005,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -2127,6 +2124,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e..7248d24c4908 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 From 2c8a7c66c90832432496616a9a3c07293f1364f3 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Fri, 18 Apr 2025 11:16:42 +0800 Subject: [PATCH 066/239] iommu/vt-d: Apply quirk_iommu_igfx for 8086:0044 (QM57/QS57) On the Lenovo ThinkPad X201, when Intel VT-d is enabled in the BIOS, the kernel boots with errors related to DMAR, the graphical interface appeared quite choppy, and the system resets erratically within a minute after it booted: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:02.0] fault addr 0xb97ff000 [fault reason 0x05] PTE Write access is not set Upon comparing boot logs with VT-d on/off, I found that the Intel Calpella quirk (`quirk_calpella_no_shadow_gtt()') correctly applied the igfx IOMMU disable/quirk correctly: pci 0000:00:00.0: DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics Whereas with VT-d on, it went into the "else" branch, which then triggered the DMAR handling fault above: ... else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); } Now, this is not exactly scientific, but moving 0x0044 to quirk_iommu_igfx seems to have fixed the aforementioned issue. Running a few `git blame' runs on the function, I have found that the quirk was originally introduced as a fix specific to ThinkPad X201: commit 9eecabcb9a92 ("intel-iommu: Abort IOMMU setup for igfx if BIOS gave no shadow GTT space") Which was later revised twice to the "else" branch we saw above: - 2011: commit 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") - 2024: commit ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") I'm uncertain whether further testings on this particular laptops were done in 2011 and (honestly I'm not sure) 2024, but I would be happy to do some distro-specific testing if that's what would be required to verify this patch. P.S., I also see IDs 0x0040, 0x0062, and 0x006a listed under the same `quirk_calpella_no_shadow_gtt()' quirk, but I'm not sure how similar these chipsets are (if they share the same issue with VT-d or even, indeed, if this issue is specific to a bug in the Lenovo BIOS). With regards to 0x0062, it seems to be a Centrino wireless card, but not a chipset? I have also listed a couple (distro and kernel) bug reports below as references (some of them are from 7-8 years ago!), as they seem to be similar issue found on different Westmere/Ironlake, Haswell, and Broadwell hardware setups. Cc: stable@vger.kernel.org Fixes: 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") Fixes: ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") Link: https://groups.google.com/g/qubes-users/c/4NP4goUds2c?pli=1 Link: https://bugs.archlinux.org/task/65362 Link: https://bbs.archlinux.org/viewtopic.php?id=230323 Reported-by: Wenhao Sun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=197029 Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/r/20250415133330.12528-1-jeffbai@aosc.io Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e60b699e7b8c..cb0b993bebb4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4439,6 +4439,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4516,7 +4519,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); From b79028039f440e7d2c4df6ab243060c4e3803e84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Apr 2025 13:36:21 +0200 Subject: [PATCH 067/239] cpufreq: Fix setting policy limits when frequency tables are used Commit 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") overlooked the fact that policy->min and policy->max were accessed directly in cpufreq_frequency_table_target() and in the functions called by it. Consequently, the changes made by that commit led to problems with setting policy limits. Address this by passing the target frequency limits to __resolve_freq() and cpufreq_frequency_table_target() and propagating them to the functions called by the latter. Fixes: 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") Cc: 5.16+ # 5.16+ Closes: https://lore.kernel.org/linux-pm/aAplED3IA_J0eZN0@linaro.org/ Reported-by: Stephan Gerhold Signed-off-by: Rafael J. Wysocki Tested-by: Stephan Gerhold Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/5896780.DvuYhMxLoT@rjwysocki.net --- drivers/cpufreq/cpufreq.c | 22 +++++--- drivers/cpufreq/cpufreq_ondemand.c | 3 +- drivers/cpufreq/freq_table.c | 6 +-- include/linux/cpufreq.h | 83 +++++++++++++++++++----------- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index acf19b0042bb..f45ded62b0e0 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -536,14 +536,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; + target_freq = clamp_val(target_freq, min, max); + if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -577,8 +581,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, if (unlikely(min > max)) min = max; - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2397,8 +2400,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2727,8 +2730,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * compiler optimizations around them because they may be accessed * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); trace_cpu_frequency_limits(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a7c38b8b3e78..0e65d37c9231 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -76,7 +76,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c03a91502f84..35de513af6c9 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -115,8 +115,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -147,7 +147,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5..7a5b391dcc01 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -776,8 +776,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -840,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -855,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -908,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -923,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -993,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1008,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1017,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1032,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } From 5b1834d6202f86180e451ad1a2a8a193a1da18fc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Apr 2025 17:25:12 +0100 Subject: [PATCH 068/239] drm/fdinfo: Protect against driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we unbind a driver from the PCI device with an active DRM client, subsequent read of the fdinfo data associated with the file descriptor in question will not end well. Protect the path with a drm_dev_enter/exit() pair. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Umesh Nerlige Ramappa Reviewed-by: Christian König Fixes: 3f09a0cd4ea3 ("drm: Add common fdinfo helper") Cc: # v6.5+ Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250418162512.72324-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/drm_file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c299cd94d3f7..cf2463090d3a 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -964,6 +964,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -983,6 +987,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); From bc43f7114a0e8173968085b21535d57b8030d571 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:13 +0200 Subject: [PATCH 069/239] drm: adp: Use spin_lock_irqsave for drm device event_lock The lock is used in the interrupt handler so use spin_lock_irqsave to disable interrupts and avoid deadlocks with the irq handler. Fixes: 332122eba628 ("drm: adp: Add Apple Display Pipe driver") Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-1-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index c98c647f981d..157298a8ff42 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -310,6 +310,7 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { u32 frame_num = 1; + unsigned long flags; struct adp_drv_private *adp = crtc_to_adp(crtc); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); u64 new_size = ALIGN(new_state->mode.hdisplay * @@ -330,13 +331,13 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irq(&crtc->dev->event_lock); + spin_lock_irqsave(&crtc->dev->event_lock, flags); if (crtc->state->event) { drm_crtc_vblank_get(crtc); adp->event = crtc->state->event; } crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } static const struct drm_crtc_funcs adp_crtc_funcs = { From 7a7d6681d5adde7dc7e648dcc6b9e9be6ca93d5d Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:14 +0200 Subject: [PATCH 070/239] drm: adp: Handle drm_crtc_vblank_get() errors drm_crtc_vblank_get() may fail when it's called before drm_crtc_vblank_on() on a resetted CRTC. This occurs in drm_crtc_helper_funcs' atomic_flush() calls after drm_atomic_helper_crtc_reset() for example directly after probe. Send the vblank event directly in such cases. Avoids following warning in the subsequent drm_crtc_vblank_put() call from the vblank irq handler as below: adp 228200000.display-pipe: [drm] drm_WARN_ON(atomic_read(&vblank->refcount) == 0) WARNING: CPU: 5 PID: 1206 at drivers/gpu/drm/drm_vblank.c:1247 drm_vblank_put+0x158/0x170 ... Call trace: drm_vblank_put+0x158/0x170 (P) drm_crtc_vblank_put+0x24/0x38 adp_fe_irq+0xd8/0xe8 [adpdrm] __handle_irq_event_percpu+0x94/0x318 handle_irq_event+0x54/0xd0 handle_fasteoi_irq+0xa8/0x240 handle_irq_desc+0x3c/0x68 generic_handle_domain_irq+0x24/0x40 Modifying `crtc->state->event` here is fine as crtc->mutex is locked by the non-async atomic commit. In retrospect this looks so obvious that it doesn't warrant a comment in the file. Signed-off-by: Janne Grunau Reviewed-by: Alyssa Rosenzweig Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-2-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 157298a8ff42..bdf27ee742ea 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -331,13 +331,19 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irqsave(&crtc->dev->event_lock, flags); if (crtc->state->event) { - drm_crtc_vblank_get(crtc); - adp->event = crtc->state->event; + struct drm_pending_vblank_event *event = crtc->state->event; + + crtc->state->event = NULL; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, event); + else + adp->event = event; + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } - crtc->state->event = NULL; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } static const struct drm_crtc_funcs adp_crtc_funcs = { From c082a52125d9007b488d590c412fd126aa78c345 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:15 +0200 Subject: [PATCH 071/239] drm: adp: Enable vblank interrupts in crtc's .atomic_enable Calling drm_crtc_vblank_on() drm_crtc_helper_funcs' atomic_enable is expected to enable vblank interrupts. It may have been avoided here to due to drm_crtc_vblank_get()'s error behavior after drm_crtc_vblank_reset(). With that fixed in the preceding change the driver can call drm_crtc_vblank_on() from adp_crtc_atomic_enable(). Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-3-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index bdf27ee742ea..50d26c73301c 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -288,6 +288,7 @@ static void adp_crtc_atomic_enable(struct drm_crtc *crtc, writel(BIT(0), adp->be + ADBE_BLEND_EN3); writel(BIT(0), adp->be + ADBE_BLEND_BYPASS); writel(BIT(0), adp->be + ADBE_BLEND_EN4); + drm_crtc_vblank_on(crtc); } static void adp_crtc_atomic_disable(struct drm_crtc *crtc, @@ -519,8 +520,7 @@ static int adp_drm_bind(struct device *dev) struct adp_drv_private *adp = to_adp(drm); int err; - adp_disable_vblank(adp); - writel(ADP_CTRL_FIFO_ON | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL); + writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL); adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0); if (IS_ERR(adp->next_bridge)) { From 8f6dfc4d7037e88cc0a4be4f290829946999341f Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:16 +0200 Subject: [PATCH 072/239] drm: adp: Remove pointless irq_lock spin lock Interrupt handlers run with interrupts disabled so it is not necessary to protect them against reentrancy. Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-4-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 50d26c73301c..54cde090c3f4 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -121,7 +121,6 @@ struct adp_drv_private { dma_addr_t mask_iova; int be_irq; int fe_irq; - spinlock_t irq_lock; struct drm_pending_vblank_event *event; }; @@ -490,8 +489,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) u32 int_status; u32 int_ctl; - spin_lock(&adp->irq_lock); - int_status = readl(adp->fe + ADP_INT_STATUS); if (int_status & ADP_INT_STATUS_VBLANK) { drm_crtc_handle_vblank(&adp->crtc); @@ -509,7 +506,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) writel(int_status, adp->fe + ADP_INT_STATUS); - spin_unlock(&adp->irq_lock); return IRQ_HANDLED; } @@ -574,8 +570,6 @@ static int adp_probe(struct platform_device *pdev) if (IS_ERR(adp)) return PTR_ERR(adp); - spin_lock_init(&adp->irq_lock); - dev_set_drvdata(&pdev->dev, &adp->drm); err = adp_parse_of(pdev, adp); From 32dce6b1949a696dc7abddc04de8cbe35c260217 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 4 Mar 2025 20:12:14 +0100 Subject: [PATCH 073/239] drm: Select DRM_KMS_HELPER from DRM_DEBUG_DP_MST_TOPOLOGY_REFS Using "depends on" and "select" for the same Kconfig symbol is known to cause circular dependencies (cmp. "Kconfig recursive dependency limitations" in Documentation/kbuild/kconfig-language.rst. DRM drivers are selecting drm helpers so do the same for DRM_DEBUG_DP_MST_TOPOLOGY_REFS. Fixes following circular dependency reported on x86 for the downstream Asahi Linux tree: error: recursive dependency detected! symbol DRM_KMS_HELPER is selected by DRM_GEM_SHMEM_HELPER symbol DRM_GEM_SHMEM_HELPER is selected by RUST_DRM_GEM_SHMEM_HELPER symbol RUST_DRM_GEM_SHMEM_HELPER is selected by DRM_ASAHI symbol DRM_ASAHI depends on RUST symbol RUST depends on CALL_PADDING symbol CALL_PADDING depends on OBJTOOL symbol OBJTOOL is selected by STACK_VALIDATION symbol STACK_VALIDATION depends on UNWINDER_FRAME_POINTER symbol UNWINDER_FRAME_POINTER is part of choice block at arch/x86/Kconfig.debug:224 symbol unknown is visible depending on UNWINDER_GUESS symbol UNWINDER_GUESS prompt is visible depending on STACKDEPOT symbol STACKDEPOT is selected by DRM_DEBUG_DP_MST_TOPOLOGY_REFS symbol DRM_DEBUG_DP_MST_TOPOLOGY_REFS depends on DRM_KMS_HELPER Fixes: 12a280c72868 ("drm/dp_mst: Add topology ref history tracking for debugging") Cc: stable@vger.kernel.org Signed-off-by: Janne Grunau Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250304-drm_debug_dp_mst_topo_kconfig-v1-1-e16fd152f258@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2cba2b6ebe1c..f01925ed8176 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -188,7 +188,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help From a2f546330ef9f3471ab9dd5f59e9685733b6c0dc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 20:07:24 -0400 Subject: [PATCH 074/239] bcachefs: Fix losing return code in next_fiemap_extent() Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0f1d61aab90b..72b722d80813 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1464,8 +1464,8 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, unsigned sectors = cur->kbuf.k->k.size; s64 offset_into_extent = 0; enum btree_id data_btree = BTREE_ID_extents; - int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, - &cur->kbuf); + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) goto err; From c83311c5b90d12ea22e847fd9390e2fdb6a34f68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:38:58 -0400 Subject: [PATCH 075/239] bcachefs: Use generic_set_sb_d_ops for standard casefolding d_ops Suggested-by: Al Viro Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 11 ++++++++--- fs/bcachefs/namei.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 72b722d80813..113db85b6ef9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -66,6 +66,8 @@ static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_in if (bch2_inode_casefold(c, &inode->ei_inode)) inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; } void bch2_inode_update_after_write(struct btree_trans *trans, @@ -848,10 +850,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, set_nlink(&inode->v, 0); } - if (IS_CASEFOLDED(vdir)) { + if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); - d_prune_aliases(&inode->v); - } err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -2571,6 +2571,11 @@ got_sb: if (ret) goto err_put_super; +#ifdef CONFIG_UNICODE + sb->s_encoding = c->cf_encoding; +#endif + generic_set_sb_d_ops(sb); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); bch_err_msg(c, ret, "mounting: error getting root inode"); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 46f3c8b100a9..52c58c6d53d2 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold) + continue; + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; From 70c3d89f49523933365d91010f88206855bc1990 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:09:33 -0400 Subject: [PATCH 076/239] bcachefs: Emit unicode version message on startup fstests expects this Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e4ab0595c0ae..32fccca350ed 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -823,25 +823,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; -#ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; - } -#else - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif - pr_uuid(&name, c->sb.user_uuid.b); ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; if (ret) @@ -941,6 +922,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; +#ifdef CONFIG_UNICODE + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; From bdc32a10a29c3993b3c6c38b21951b66bea525d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:19:47 -0400 Subject: [PATCH 077/239] bcachefs: Add missing utf8_unload() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 32fccca350ed..27943082c093 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -531,6 +531,10 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); +#ifdef CONFIG_UNICODE + utf8_unload(c->cf_encoding); +#endif + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); From 3c24020119a5f55ec902b5fdc24d8666d76340b3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:05:32 -0400 Subject: [PATCH 078/239] bcachefs: Run BCH_RECOVERY_PASS_reconstruct_snapshots on missing subvol -> snapshot Fix this repair path. Signed-off-by: Kent Overstreet --- fs/bcachefs/subvolume.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 5537283d0bea..d0209f7658bb 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,6 +6,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -44,8 +45,8 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, snapid); + return bch2_run_explicit_recovery_pass(c, + BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; From 9e9c28acfdc78292100fdd0e46587bf43a174451 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:05:32 -0400 Subject: [PATCH 079/239] bcachefs: Add upgrade table entry from 0.14 There are a few errors that needed to be marked as autofix. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 4 ++++ fs/bcachefs/sb-errors_format.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index acb5d845841e..badd0e17ada5 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -20,6 +20,10 @@ * x(version, recovery_passes, errors...) */ #define UPGRADE_TABLE() \ + x(snapshot_2, \ + RECOVERY_PASS_ALL_FSCK, \ + BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ + BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ x(backpointers, \ RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index dc53d25c7cbb..3711aa5009ac 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -205,9 +205,9 @@ enum bch_fsck_flags { x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ - x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \ x(subvol_to_missing_root, 188, 0) \ - x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ From 002466446abae31a15e8b89adb54ee08653eccd1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:09:53 -0400 Subject: [PATCH 080/239] bcachefs: fix bch2_dev_buckets_resize() The resize memcpy path was totally busted. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4ef261e8db4f..e1efae43982a 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1307,13 +1307,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - bucket_gens->nbuckets = min(bucket_gens->nbuckets, - old_bucket_gens->nbuckets); - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; + u64 copy = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); memcpy(bucket_gens->b, old_bucket_gens->b, - bucket_gens->nbuckets); + sizeof(bucket_gens->b[0]) * copy); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); From e7f1a52849a01c63c796a9dfe6697d05fff23324 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:01:51 -0400 Subject: [PATCH 081/239] bcachefs: Improve bch2_dev_bucket_missing() More useful error message. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 6 ++++-- fs/bcachefs/sb-members.h | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 116131f95815..72779912939b 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -15,9 +15,11 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } -void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) +void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) { - bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); + bch2_fs_inconsistent(ca->fs, + "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", + bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); } #define x(t, n, ...) [n] = #t, diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 06bb41a3f360..42786657522c 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -249,20 +249,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); - if (ca && !bucket_valid(ca, bucket.offset)) { + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { bch2_dev_put(ca); ca = NULL; } return ca; } -void bch2_dev_bucket_missing(struct bch_fs *, struct bpos); +void bch2_dev_bucket_missing(struct bch_dev *, u64); static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) { - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket); - if (!ca) - bch2_dev_bucket_missing(c, bucket); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { + bch2_dev_bucket_missing(ca, bucket.offset); + bch2_dev_put(ca); + ca = NULL; + } return ca; } From eca5b56ccfdf583a8781503646fb39554f8624bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:11:31 -0400 Subject: [PATCH 082/239] bcachefs: Don't generate alloc updates to invalid buckets Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e1efae43982a..31fbc2716d8b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -604,6 +604,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + if (!bucket_valid(ca, bucket.offset)) { + if (insert) { + bch2_dev_bucket_missing(ca, bucket.offset); + ret = -BCH_ERR_trigger_pointer; + } + goto err; + } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); From c366b1672d74cb008974f6e36e34dc191621f3bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 09:31:23 -0400 Subject: [PATCH 083/239] bcachefs: btree_node_data_missing is now autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 3711aa5009ac..a4ad5924107b 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -46,7 +46,7 @@ enum bch_fsck_flags { x(btree_node_unsupported_version, 34, 0) \ x(btree_node_bset_older_than_sb_min, 35, 0) \ x(btree_node_bset_newer_than_sb, 36, 0) \ - x(btree_node_data_missing, 37, 0) \ + x(btree_node_data_missing, 37, FSCK_AUTOFIX) \ x(btree_node_bset_after_end, 38, 0) \ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ x(btree_node_replicas_data_mismatch, 40, 0) \ From 9c51f24c1ac7cbde9cc94a54137775dc52aae491 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Apr 2025 18:03:47 +0100 Subject: [PATCH 084/239] scsi: myrb: Fix spelling mistake "statux" -> "status" There is a spelling mistake in a dev_err() message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20250422170347.66792-1-colin.i.king@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/myrb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index dc4bd422b601..486db5b2f05d 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -891,7 +891,7 @@ static bool myrb_enable_mmio(struct myrb_hba *cb, mbox_mmio_init_t mmio_init_fn) status = mmio_init_fn(pdev, base, &mbox); if (status != MYRB_STATUS_SUCCESS) { dev_err(&pdev->dev, - "Failed to enable mailbox, statux %02X\n", + "Failed to enable mailbox, status %02X\n", status); return false; } From 0e9693b97a0eee1df7bae33aec207c975fbcbdb8 Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Fri, 25 Apr 2025 10:06:05 +0900 Subject: [PATCH 085/239] scsi: ufs: core: Remove redundant query_complete trace The query_complete trace was not removed after ufshcd_issue_dev_cmd() was called from the bsg path, resulting in duplicate output. Below is an example of the trace: ufs-utils-773 [000] ..... 218.176933: ufshcd_upiu: query_send: 0000:00:04.0: HDR:16 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ufs-utils-773 [000] ..... 218.177145: ufshcd_upiu: query_complete: 0000:00:04.0: HDR:36 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 08 00 00 00 00 ufs-utils-773 [000] ..... 218.177146: ufshcd_upiu: query_complete: 0000:00:04.0: HDR:36 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 08 00 00 00 00 Remove the redundant trace call in the bsg path, preventing duplication. Signed-off-by: Keoseong Park Link: https://lore.kernel.org/r/20250425010605epcms2p67e89b351398832fe0fd547404d3afc65@epcms2p6 Fixes: 71aabb747d5f ("scsi: ufs: core: Reuse exec_dev_cmd") Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5cb6132b8147..7735421e3991 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7265,8 +7265,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, err = -EINVAL; } } - ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, - (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); return err; } From 652dd6558b8b5a1a04fef129e0231ee493e24951 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:12:01 -0400 Subject: [PATCH 086/239] bcachefs: btree_root_unreadable_and_scan_found_nothing autofix for non data btrees If loosing a btree won't cause data loss - i.e. it's an alloc btree, or we can easily reconstruct it - we shouldn't require user action to continue repair. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7b98ba2dec64..37b69d89341f 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -47,6 +47,27 @@ #define DROP_PREV_NODE 11 #define DID_FILL_FROM_SCAN 12 +/* + * Returns true if it's a btree we can easily reconstruct, or otherwise won't + * cause data loss if it's missing: + */ +static bool btree_id_important(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return false; + + switch (btree) { + case BTREE_ID_quotas: + case BTREE_ID_snapshot_trees: + case BTREE_ID_logged_ops: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return false; + default: + return true; + } +} + static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -534,8 +555,10 @@ reconstruct_root: r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); From e5a3b8cf3330a774e5f5f06a2b7cf20116447297 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:25:15 -0400 Subject: [PATCH 087/239] bcachefs: More informative error message when shutting down due to error Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 925b0b54ea2f..6b8695b1349c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -478,7 +478,9 @@ int __bch2_fsck_err(struct bch_fs *c, } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { - prt_str(out, ", shutting down"); + prt_str_indented(out, ", shutting down\n" + "error not marked as autofix and not in fsck\n" + "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; ret = -BCH_ERR_fsck_errors_not_fixed; From 9a4a858c9b365d817231f6f3592dc26e9d4191bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:28:58 -0400 Subject: [PATCH 088/239] bcachefs: Use bch2_kvmalloc() for journal keys array We can hit this limit fairly easy when we have to reconstuct large amounts of alloc info on large filesystems. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 7d6c971db23c..ade3b5addd75 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); + new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL); if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); From dbe4674802ec8e43835900490b3492299464ad27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:38:04 -0400 Subject: [PATCH 089/239] bcachefs: Topology error after insert is now an ERO A user hit this, and this will naturally be easier to debug if we don't panic. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 49 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 44b5fe430370..00307356d7c8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1389,7 +1389,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, printbuf_exit(&buf); } -static void +static int bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1411,7 +1411,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, insert = bkey_next(insert)) bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - if (bch2_btree_node_check_topology(trans, b)) { + int ret = bch2_btree_node_check_topology(trans, b); + if (ret) { struct printbuf buf = PRINTBUF; for (struct bkey_i *k = keys->keys; @@ -1421,11 +1422,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, prt_newline(&buf); } - panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s", + (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf); + dump_stack(); + return ret; } memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); keys->top_p -= insert->_data - keys->keys_p; + return 0; } static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) @@ -1559,11 +1564,11 @@ static void __btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, - struct btree_trans *trans, - btree_path_idx_t path_idx, - struct btree *b, - struct keylist *keys) +static int btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + btree_path_idx_t path_idx, + struct btree *b, + struct keylist *keys) { struct btree_path *path = trans->paths + path_idx; @@ -1573,8 +1578,12 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + if (ret) + return ret; } + + return 0; } static int btree_split(struct btree_update *as, struct btree_trans *trans, @@ -1607,8 +1616,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, __btree_split_node(as, trans, b, n, keys); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); - btree_split_insert_keys(as, trans, path, n2, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys) ?: + btree_split_insert_keys(as, trans, path, n2, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1654,7 +1665,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + if (ret) + goto err; } } else { trace_and_count(c, btree_node_compact, trans, b); @@ -1662,7 +1675,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1809,15 +1824,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - ret = bch2_btree_node_check_topology(trans, b); + + ret = bch2_btree_node_check_topology(trans, b) ?: + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); if (ret) { bch2_btree_node_unlock_write(trans, path, b); return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); From bbfe756dc3062c1e934f06e5ba39c239aa953b92 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 29 Apr 2025 01:09:33 +0200 Subject: [PATCH 090/239] fs/erofs/fileio: call erofs_onlinefolio_split() after bio_add_folio() If bio_add_folio() fails (because it is full), erofs_fileio_scan_folio() needs to submit the I/O request via erofs_fileio_rq_submit() and allocate a new I/O request with an empty `struct bio`. Then it retries the bio_add_folio() call. However, at this point, erofs_onlinefolio_split() has already been called which increments `folio->private`; the retry will call erofs_onlinefolio_split() again, but there will never be a matching erofs_onlinefolio_end() call. This leaves the folio locked forever and all waiters will be stuck in folio_wait_bit_common(). This bug has been added by commit ce63cb62d794 ("erofs: support unencoded inodes for fileio"), but was practically unreachable because there was room for 256 folios in the `struct bio` - until commit 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") which reduced the array capacity to 16 folios. It was now trivial to trigger the bug by manually invoking readahead from userspace, e.g.: posix_fadvise(fd, 0, st.st_size, POSIX_FADV_WILLNEED); This should be fixed by invoking erofs_onlinefolio_split() only after bio_add_folio() has succeeded. This is safe: asynchronous completions invoking erofs_onlinefolio_end() will not unlock the folio because erofs_fileio_scan_folio() is still holding a reference to be released by erofs_onlinefolio_end() at the end. Fixes: ce63cb62d794 ("erofs: support unencoded inodes for fileio") Fixes: 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Reviewed-by: Gao Xiang Tested-by: Hongbo Li Link: https://lore.kernel.org/r/20250428230933.3422273-1-max.kellermann@ionos.com Signed-off-by: Gao Xiang --- fs/erofs/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 4fa0a0121288..60c7cc4c105c 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -150,10 +150,10 @@ io_retry: io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; attached = 0; } - if (!attached++) - erofs_onlinefolio_split(folio); if (!bio_add_folio(&io->rq->bio, folio, len, cur)) goto io_retry; + if (!attached++) + erofs_onlinefolio_split(folio); io->dev.m_pa += len; } cur += len; From c1c9cad50c5c35cd4de1b54af59a28bf07451593 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sun, 23 Mar 2025 05:49:06 -0700 Subject: [PATCH 091/239] drm/xe/svm: fix dereferencing error pointer in drm_gpusvm_range_alloc() xe_svm_range_alloc() returns ERR_PTR(-ENOMEM) on failure and there is a dereference of "range" after that: --> range->gpusvm = gpusvm; In xe_svm_range_alloc(), when memory allocation fails return NULL instead to handle this situation. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/adaef4dd-5866-48ca-bc22-4a1ddef20381@stanley.mountain/ Signed-off-by: Harshit Mogalapalli Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250323124907.3946370-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 7a0322122cfdd9a6f10fc7701023d75c98eb3d22) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index f8c128524d9f..0b6547c06961 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -79,7 +79,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); From 5e639707ddb8f080fbde805a1bfa6668a1b45298 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 17 Apr 2025 12:52:12 -0700 Subject: [PATCH 092/239] drm/xe/guc: Fix capture of steering registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The list of registers to capture on a GPU hang includes some that require steering. Unfortunately, the flag to say this was being wiped to due a missing OR on the assignment of the next flag field. Fix that. Fixes: b170d696c1e2 ("drm/xe/guc: Add XE_LP steered register lists") Cc: Zhanjun Dong Cc: Alan Previn Cc: Matt Roper Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: intel-xe@lists.freedesktop.org Signed-off-by: John Harrison Reviewed-by: Matt Roper Reviewed-by: Zhanjun Dong Link: https://lore.kernel.org/r/20250417195215.3002210-2-John.C.Harrison@Intel.com (cherry picked from commit 532da44b54a10d50ebad14a8a02bd0b78ec23e8b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..9095618648bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } From b1852c5de2f2a37dd4462f7837c9e3e678f9e546 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 21 Apr 2025 14:23:41 +0800 Subject: [PATCH 093/239] i2c: imx-lpi2c: Fix clock count when probe defers Deferred probe with pm_runtime_put() may delay clock disable, causing incorrect clock usage count. Use pm_runtime_put_sync() to ensure the clock is disabled immediately. Fixes: 13d6eb20fc79 ("i2c: imx-lpi2c: add runtime pm support") Signed-off-by: Clark Wang Signed-off-by: Carlos Song Cc: # v4.16+ Link: https://lore.kernel.org/r/20250421062341.2471922-1-carlos.song@nxp.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 0d4b3935e687..342d47e67586 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1380,9 +1380,9 @@ static int lpi2c_imx_probe(struct platform_device *pdev) return 0; rpm_disable: - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } From 12b8a672d2aa053064151659f49e7310674d42d3 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Tue, 29 Apr 2025 09:32:29 +0530 Subject: [PATCH 094/239] pinctrl: qcom: Fix PINGROUP definition for sm8750 On newer SoCs intr_target_bit position is at 8 instead of 5. Fix it. Also add missing intr_wakeup_present_bit and intr_wakeup_enable_bit which enables forwarding of GPIO interrupts to parent PDC interrupt controller. Fixes: afe9803e3b82 ("pinctrl: qcom: Add sm8750 pinctrl driver") Signed-off-by: Maulik Shah Reviewed-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Reviewed-by: Melody Olvera Link: https://lore.kernel.org/20250429-pinctrl_sm8750-v2-1-87d45dd3bd82@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8750.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index 1af11cd95fb0..b94fb4ee0ec3 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -46,7 +46,9 @@ .out_bit = 1, \ .intr_enable_bit = 0, \ .intr_status_bit = 0, \ - .intr_target_bit = 5, \ + .intr_wakeup_present_bit = 6, \ + .intr_wakeup_enable_bit = 7, \ + .intr_target_bit = 8, \ .intr_target_kpss_val = 3, \ .intr_raw_status_bit = 4, \ .intr_polarity_bit = 1, \ From 730d837979bac203c786f2c5b0707f5426275c0d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:36 +0800 Subject: [PATCH 095/239] selftests: ublk: fix UBLK_F_NEED_GET_DATA Commit 57e13a2e8cd2 ("selftests: ublk: support user recovery") starts to support UBLK_F_NEED_GET_DATA for covering recovery feature, however the ublk utility implementation isn't done correctly. Fix it by supporting UBLK_F_NEED_GET_DATA correctly. Also add test generic_07 for covering UBLK_F_NEED_GET_DATA. Reviewed-by: Caleb Sander Mateos Fixes: 57e13a2e8cd2 ("selftests: ublk: support user recovery") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/kublk.c | 22 +++++++++------ tools/testing/selftests/ublk/kublk.h | 1 + .../testing/selftests/ublk/test_generic_07.sh | 28 +++++++++++++++++++ .../testing/selftests/ublk/test_stress_05.sh | 8 +++--- 5 files changed, 48 insertions(+), 12 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_07.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index ec4624a283bc..f34ac0bac696 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -9,6 +9,7 @@ TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e57a1486bb48..842b40736a9b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -536,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -658,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -1237,7 +1245,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1313,7 +1321,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1351,9 +1359,7 @@ int main(int argc, char *argv[]) ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; break; case 'g': - value = strtol(optarg, NULL, 10); - if (value) - ctx.flags |= UBLK_F_NEED_GET_DATA; + ctx.flags |= UBLK_F_NEED_GET_DATA; break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 918db5cd633f..44ee1e4ac55b 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -115,6 +115,7 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) unsigned short flags; unsigned short refs; /* used by target code only */ diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 000000000000..cba86451fa5e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index a7071b10224d..88601b48f1cd 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -47,15 +47,15 @@ _create_backfile 0 256M _create_backfile 1 256M for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & wait done if _have_feature "ZERO_COPY"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & wait done fi From 69edf98be844375807f299397c516fb1e962b3cc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:37 +0800 Subject: [PATCH 096/239] ublk: decouple zero copy from user copy UBLK_F_USER_COPY and UBLK_F_SUPPORT_ZERO_COPY are two different features, and shouldn't be coupled together. Commit 1f6540e2aabb ("ublk: zc register/unregister bvec") enables user copy automatically in case of UBLK_F_SUPPORT_ZERO_COPY, this way isn't correct. So decouple zero copy from user copy, and use independent helper to check each one. Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 40f971a66d3e..0a3a3c64316d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -205,11 +205,6 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); -static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) -{ - return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); -} - static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_ZONED; @@ -609,14 +604,19 @@ static void ublk_apply_params(struct ublk_device *ub) ublk_dev_param_zoned_apply(ub); } +static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { - return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); + return ubq->flags & UBLK_F_USER_COPY; } static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { - return !ublk_support_user_copy(ubq); + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); } static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) @@ -624,8 +624,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) /* * read()/write() is involved in user copy, so request reference * has to be grabbed + * + * for zero copy, request buffer need to be registered to io_uring + * buffer table, so reference is needed */ - return ublk_support_user_copy(ubq); + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, @@ -2245,6 +2248,9 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, if (!ubq) return ERR_PTR(-EINVAL); + if (!ublk_support_user_copy(ubq)) + return ERR_PTR(-EACCES); + if (tag >= ubq->q_depth) return ERR_PTR(-EINVAL); @@ -2783,13 +2789,18 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE | UBLK_F_URING_CMD_COMP_IN_TASK; - /* GET_DATA isn't needed any more with USER_COPY */ - if (ublk_dev_is_user_copy(ub)) + /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; - /* Zoned storage support requires user copy feature */ + /* + * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for + * returning write_append_lba, which is only allowed in case of + * user copy or zero copy + */ if (ublk_dev_is_zoned(ub) && - (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) { + (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags & + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) { ret = -EINVAL; goto out_free_dev_number; } From 6240f43b29f285a40eebeb789756673af7a7d67c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:38 +0800 Subject: [PATCH 097/239] ublk: enhance check for register/unregister io buffer command The simple check of UBLK_IO_FLAG_OWNED_BY_SRV can avoid incorrect register/unregister io buffer easily, so check it before calling starting to register/un-register io buffer. Also only allow io buffer register/unregister uring_cmd in case of UBLK_F_SUPPORT_ZERO_COPY. Also mark argument 'ublk_queue *' of ublk_register_io_buf as const. Reviewed-by: Caleb Sander Mateos Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 0a3a3c64316d..c624d8f653ae 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -201,7 +201,7 @@ struct ublk_params_header { static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset); + const struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); @@ -1949,13 +1949,20 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; + const struct ublk_io *io = &ubq->ios[tag]; struct request *req; int ret; + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + req = __ublk_check_and_get_req(ub, ubq, tag, 0); if (!req) return -EINVAL; @@ -1971,8 +1978,17 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { + const struct ublk_io *io = &ubq->ios[tag]; + + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + return io_buffer_unregister_bvec(cmd, index, issue_flags); } @@ -2076,7 +2092,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_REGISTER_IO_BUF: return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); if (ret) @@ -2128,7 +2144,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset) + const struct ublk_queue *ubq, int tag, size_t offset) { struct request *req; From a584b2630b0d31f8a20e4ccb4de370b160177b8a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:39 +0800 Subject: [PATCH 098/239] ublk: remove the check of ublk_need_req_ref() from __ublk_check_and_get_req __ublk_check_and_get_req() is only called from ublk_check_and_get_req() and ublk_register_io_buf(), the same check has been covered in the two calling sites. So remove the check from __ublk_check_and_get_req(). Suggested-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c624d8f653ae..f9032076bc06 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2148,9 +2148,6 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, { struct request *req; - if (!ublk_need_req_ref(ubq)) - return NULL; - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); if (!req) return NULL; From 56f1f30e6795b890463d9b20b11e576adf5a2f77 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 14:48:41 +0200 Subject: [PATCH 099/239] ALSA: ump: Fix buffer overflow at UMP SysEx message conversion The conversion function from MIDI 1.0 to UMP packet contains an internal buffer to keep the incoming MIDI bytes, and its size is 4, as it was supposed to be the max size for a MIDI1 UMP packet data. However, the implementation overlooked that SysEx is handled in a different format, and it can be up to 6 bytes, as found in do_convert_to_ump(). It leads eventually to a buffer overflow, and may corrupt the memory when a longer SysEx message is received. The fix is simply to extend the buffer size to 6 to fit with the SysEx UMP message. Fixes: 0b5288f5fe63 ("ALSA: ump: Add legacy raw MIDI support") Reported-by: Argusee Link: https://patch.msgid.link/20250429124845.25128-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump_convert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index d099ae27f849..682499b871ea 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -19,7 +19,7 @@ struct ump_cvt_to_ump_bank { /* context for converting from MIDI1 byte stream to UMP packet */ struct ump_cvt_to_ump { /* MIDI1 intermediate buffer */ - unsigned char buf[4]; + unsigned char buf[6]; /* up to 6 bytes for SysEx */ int len; int cmd_bytes; From a75401227eeb827b1a162df1aa9d5b33da921c43 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 Apr 2025 10:18:01 -0700 Subject: [PATCH 100/239] nvme-pci: fix queue unquiesce check on slot_reset A zero return means the reset was successfully scheduled. We don't want to unquiesce the queues while the reset_work is pending, as that will just flush out requeued requests to a failed completion. Fixes: 71a5bb153be104 ("nvme: ensure disabling pairs with unquiesce") Reported-by: Dhankaran Singh Ajravat Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b178d52eac1b..c9e2a5450bc0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3575,7 +3575,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - if (!nvme_try_sched_reset(&dev->ctrl)) + if (nvme_try_sched_reset(&dev->ctrl)) nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } From 5b960f92ac3e5b4d7f60a506a6b6735eead1da01 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 22 Apr 2025 20:17:25 +0800 Subject: [PATCH 101/239] nvme-pci: add quirks for device 126f:1001 This commit adds NVME_QUIRK_NO_DEEPEST_PS and NVME_QUIRK_BOGUS_NID for device [126f:1001]. It is similar to commit e89086c43f05 ("drivers/nvme: Add quirks for device 126f:2262") Diff is according the dmesg, use NVME_QUIRK_IGNORE_DEV_SUBNQN. dmesg | grep -i nvme0: nvme nvme0: pci function 0000:01:00.0 nvme nvme0: missing or invalid SUBNQN field. nvme nvme0: 12/0/0 default/read/poll queues Link:https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e89086c43f0500bc7c4ce225495b73b8ce234c1f Signed-off-by: Wentao Guan Signed-off-by: WangYuli Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c9e2a5450bc0..a8d4443edc49 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3623,6 +3623,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */ .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, }, + { PCI_DEVICE(0x126f, 0x1001), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_BOGUS_NID, }, From ab35ad950d439ec3409509835d229b3d93d3c7f9 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Thu, 24 Apr 2025 10:40:10 +0800 Subject: [PATCH 102/239] nvme-pci: add quirks for WDC Blue SN550 15b7:5009 Add two quirks for the WDC Blue SN550 (PCI ID 15b7:5009) based on user reports and hardware analysis: - NVME_QUIRK_NO_DEEPEST_PS: liaozw talked to me the problem and solved with nvme_core.default_ps_max_latency_us=0, so add the quirk. I also found some reports in the following link. - NVME_QUIRK_BROKEN_MSI: after get the lspci from Jack Rio. I think that the disk also have NVME_QUIRK_BROKEN_MSI. described in commit d5887dc6b6c0 ("nvme-pci: Add quirk for broken MSIs") as sean said in link which match the MSI 1/32 and MSI-X 17. Log: lspci -nn | grep -i memory 03:00.0 Non-Volatile memory controller [0108]: Sandisk Corp SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less) [15b7:5009] (rev 01) lspci -v -d 15b7:5009 03:00.0 Non-Volatile memory controller: Sandisk Corp SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less) (rev 01) (prog-if 02 [NVM Express]) Subsystem: Sandisk Corp WD Blue SN550 NVMe SSD Flags: bus master, fast devsel, latency 0, IRQ 35, IOMMU group 10 Memory at fe800000 (64-bit, non-prefetchable) [size=16K] Memory at fe804000 (64-bit, non-prefetchable) [size=256] Capabilities: [80] Power Management version 3 Capabilities: [90] MSI: Enable- Count=1/32 Maskable- 64bit+ Capabilities: [b0] MSI-X: Enable+ Count=17 Masked- Capabilities: [c0] Express Endpoint, MSI 00 Capabilities: [100] Advanced Error Reporting Capabilities: [150] Device Serial Number 00-00-00-00-00-00-00-00 Capabilities: [1b8] Latency Tolerance Reporting Capabilities: [300] Secondary PCI Express Capabilities: [900] L1 PM Substates Kernel driver in use: nvme dmesg | grep nvme [ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-6.12.20-amd64-desktop-rolling root=UUID= ro splash quiet nvme_core.default_ps_max_latency_us=0 DEEPIN_GFXMODE= [ 0.059301] Kernel command line: BOOT_IMAGE=/vmlinuz-6.12.20-amd64-desktop-rolling root=UUID= ro splash quiet nvme_core.default_ps_max_latency_us=0 DEEPIN_GFXMODE= [ 0.542430] nvme nvme0: pci function 0000:03:00.0 [ 0.560426] nvme nvme0: allocated 32 MiB host memory buffer. [ 0.562491] nvme nvme0: 16/0/0 default/read/poll queues [ 0.567764] nvme0n1: p1 p2 p3 p4 p5 p6 p7 p8 p9 [ 6.388726] EXT4-fs (nvme0n1p7): mounted filesystem ro with ordered data mode. Quota mode: none. [ 6.893421] EXT4-fs (nvme0n1p7): re-mounted r/w. Quota mode: none. [ 7.125419] Adding 16777212k swap on /dev/nvme0n1p8. Priority:-2 extents:1 across:16777212k SS [ 7.157588] EXT4-fs (nvme0n1p6): mounted filesystem r/w with ordered data mode. Quota mode: none. [ 7.165021] EXT4-fs (nvme0n1p9): mounted filesystem r/w with ordered data mode. Quota mode: none. [ 8.036932] nvme nvme0: using unchecked data buffer [ 8.096023] block nvme0n1: No UUID available providing old NGUID Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d5887dc6b6c054d0da3cd053afc15b7be1f45ff6 Link: https://lore.kernel.org/all/20240422162822.3539156-1-sean.anderson@linux.dev/ Reported-by: liaozw Closes: https://bbs.deepin.org.cn/post/286300 Reported-by: rugk Closes: https://bugzilla.kernel.org/show_bug.cgi?id=208123 Signed-off-by: Wentao Guan Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a8d4443edc49..2e30e9be7408 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3649,6 +3649,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */ .driver_data = NVME_QUIRK_BROKEN_MSI }, + { PCI_DEVICE(0x15b7, 0x5009), /* Sandisk SN550 */ + .driver_data = NVME_QUIRK_BROKEN_MSI | + NVME_QUIRK_NO_DEEPEST_PS }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ From 48ccf21fa8dc595c8aa4f1d347b593dcae0727d0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 16:07:58 +0200 Subject: [PATCH 103/239] drm/tests: shmem: Fix memleak The drm_gem_shmem_test_get_pages_sgt() gets a scatter-gather table using the drm_gem_shmem_get_sg_table() function and rightfully calls sg_free_table() on it. However, it's also supposed to kfree() the returned sg_table, but doesn't. This leads to a memory leak, reported by kmemleak. Fix it by adding a kunit action to kfree the sgt when the test ends. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: 93032ae634d4 ("drm/test: add a test suite for GEM objects backed by shmem") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250408140758.1831333-1-mripard@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_gem_shmem_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982..925fbc2cda70 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); From 1a8bc0fe8039e1e57f68c4a588f0403d98bfeb1f Mon Sep 17 00:00:00 2001 From: Russell Cloran Date: Mon, 14 Apr 2025 22:32:59 -0700 Subject: [PATCH 104/239] drm/mipi-dbi: Fix blanking for non-16 bit formats On r6x2b6x2g6x2 displays not enough blank data is sent to blank the entire screen. When support for these displays was added, the dirty function was updated to handle the different amount of data, but blanking was not, and remained hardcoded as 2 bytes per pixel. This change applies almost the same algorithm used in the dirty function to the blank function, but there is no fb available at that point, and no concern about having to transform any data, so the dbidev pixel format is always used for calculating the length. Fixes: 4aebb79021f3 ("drm/mipi-dbi: Add support for DRM_FORMAT_RGB888") Signed-off-by: Russell Cloran Link: https://lore.kernel.org/r/20250415053259.79572-1-rcloran@gmail.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_mipi_dbi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 89e05a5bed1d..a4cd476f9b30 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); From de2b2107d5a41a91ab603e135fb6e408abbee28e Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:58 +0200 Subject: [PATCH 105/239] arm64: dts: st: Adjust interrupt-controller for stm32mp25 SoCs Use gic-400 compatible and remove address-cells = <1> on aarch64 Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-2-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index f3c6cdfd7008..379e290313dc 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -115,9 +115,8 @@ }; intc: interrupt-controller@4ac00000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, <0x0 0x4ac20000 0x0 0x2000>, From 06c231fe953a26f4bc9d7a37ba1b9b288a59c7c2 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:59 +0200 Subject: [PATCH 106/239] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp25 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20250415111654.2103767-3-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index 379e290313dc..87110f91e489 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -119,9 +119,9 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, - <0x0 0x4ac20000 0x0 0x2000>, - <0x0 0x4ac40000 0x0 0x2000>, - <0x0 0x4ac60000 0x0 0x2000>; + <0x0 0x4ac20000 0x0 0x20000>, + <0x0 0x4ac40000 0x0 0x20000>, + <0x0 0x4ac60000 0x0 0x20000>; }; psci { From 02dc83f09c7262533123e7e4dae9c4f9fc40ed17 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:00 +0200 Subject: [PATCH 107/239] arm64: dts: st: Adjust interrupt-controller for stm32mp21 SoCs Use gic-400 compatible for aarch64 Fixes: 7a57b1bb1afbf ("arm64: dts: st: introduce stm32mp21 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-4-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp211.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 6dd1377f3e1d..52a8209471b8 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -116,7 +116,7 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, <0x4ac20000 0x0 0x2000>, <0x4ac40000 0x0 0x2000>, From 1bc229e9bb9cd502caeec639cb3cff50aea078f0 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:01 +0200 Subject: [PATCH 108/239] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp21 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 7a57b1bb1afbf ("arm64: dts: st: introduce stm32mp21 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-5-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp211.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 52a8209471b8..bf888d60cd4f 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -118,9 +118,9 @@ intc: interrupt-controller@4ac10000 { compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, - <0x4ac20000 0x0 0x2000>, - <0x4ac40000 0x0 0x2000>, - <0x4ac60000 0x0 0x2000>; + <0x4ac20000 0x0 0x20000>, + <0x4ac40000 0x0 0x20000>, + <0x4ac60000 0x0 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; From 3a1e1082097b8fa2e5d420de105f4cce804c38b2 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:02 +0200 Subject: [PATCH 109/239] arm64: dts: st: Adjust interrupt-controller for stm32mp23 SoCs Use gic-400 compatible and remove address-cells = <1> for aarch64 Fixes: e9b03ef21386e ("arm64: dts: st: introduce stm32mp23 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-6-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp231.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 8820d219a33e..3f73cf1d443d 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1201,13 +1201,12 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, <0x4ac20000 0x2000>, <0x4ac40000 0x2000>, <0x4ac60000 0x2000>; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; }; }; From 2ef5c66cba6171feab05e62e1b22df970b238544 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:03 +0200 Subject: [PATCH 110/239] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp23 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: e9b03ef21386e ("arm64: dts: st: introduce stm32mp23 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-7-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp231.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 3f73cf1d443d..75697acd1345 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1203,9 +1203,9 @@ intc: interrupt-controller@4ac10000 { compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, - <0x4ac20000 0x2000>, - <0x4ac40000 0x2000>, - <0x4ac60000 0x2000>; + <0x4ac20000 0x20000>, + <0x4ac40000 0x20000>, + <0x4ac60000 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; From 0759e77a6d9bd34a874da73721ce4a7dc6665023 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 20:36:15 +0200 Subject: [PATCH 111/239] ALSA: usb-audio: Fix duplicated name in MIDI substream names The MIDI substream name string is constructed from the combination of the card shortname (which is taken from USB iProduct) and the USB iJack. The problem is that some devices put the product name to the iJack field, too. For example, aplaymidi -l output on the Lanchkey MK 49 are like: % aplaymidi -l Port Client name Port name 44:0 Launchkey MK4 49 Launchkey MK4 49 Launchkey MK4 44:1 Launchkey MK4 49 Launchkey MK4 49 Launchkey MK4 where the actual iJack name can't be seen because it's truncated due to the doubly words. For resolving those situations, this patch compares the iJack string with the card shortname, and drops if both start with the same words. Then the result becomes like: % aplaymidi -l Port Client name Port name 40:0 Launchkey MK4 49 Launchkey MK4 49 MIDI In 40:1 Launchkey MK4 49 Launchkey MK4 49 DAW In A caveat is that there are some pre-defined names for certain devices in the driver code, and this workaround shouldn't be applied to them. Similarly, when the iJack isn't specified, we should skip this check, too. The patch added those checks in addition to the string comparison. Suggested-by: Paul Davis Tested-by: Paul Davis Link: https://lore.kernel.org/CAFa_cKmEDQWcJatbYWi6A58Zg4Ma9_6Nr3k5LhqwyxC-P_kXtw@mail.gmail.com Link: https://patch.msgid.link/20250429183626.20773-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index dcdd7e9e1ae9..cfed000f243a 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1885,10 +1885,18 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, } port_info = find_port_info(umidi, number); - name_format = port_info ? port_info->name : - (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); - snprintf(substream->name, sizeof(substream->name), - name_format, umidi->card->shortname, jack_name, number + 1); + if (port_info || jack_name == default_jack_name || + strncmp(umidi->card->shortname, jack_name, strlen(umidi->card->shortname)) != 0) { + name_format = port_info ? port_info->name : + (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); + snprintf(substream->name, sizeof(substream->name), + name_format, umidi->card->shortname, jack_name, number + 1); + } else { + /* The manufacturer included the iProduct name in the jack + * name, do not use both + */ + strscpy(substream->name, jack_name); + } *rsubstream = substream; } From 8988c4b91945173a6b5505764915d470f0238fdc Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 29 Apr 2025 15:22:18 +0100 Subject: [PATCH 112/239] perf tools: Fix in-source libperf build When libperf is built alone in-source, $(OUTPUT) isn't set. This causes the generated uapi path to resolve to '/../arch' which results in a permissions error: mkdir: cannot create directory '/../arch': Permission denied Fix it by removing the preceding '/..' which means that it gets generated either in the tools/lib/perf part of the tree or the OUTPUT folder. Some other rules that rely on OUTPUT further refine this conditionally depending on whether it's an in-source or out-of-source build, but I don't think we need the extra complexity here. And this rule is slightly different to others because the header is needed by both libperf and Perf. This is further complicated by the fact that Perf always passes O=... to libperf even for in source builds, meaning that OUTPUT isn't set consistently between projects. Because we're no longer going one level up to try to generate the file in the tools/ folder, Perf's include rule needs to descend into libperf. Also fix the clean rule while we're here. Reported-by: Thorsten Leemhuis Closes: https://lore.kernel.org/linux-perf-users/7703f88e-ccb7-4c98-9da4-8aad224e780f@leemhuis.info/ Fixes: bfb713ea53c7 ("perf tools: Fix arm64 build by generating unistd_64.h") Signed-off-by: James Clark Tested-by: Thorsten Leemhuis Link: https://lore.kernel.org/r/20250429-james-perf-fix-libperf-in-source-build-v1-1-a1a827ac15e5@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 6 +++--- tools/perf/Makefile.config | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 1a19b5013f45..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,7 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ --I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -100,7 +100,7 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm ifeq ($(SRCARCH),arm64) syscall-y := $(uapi-asm)/unistd_64.h endif @@ -130,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a52482654d4b..b7769a22fe1a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,7 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated -CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi +CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) From 95b2536137eeb66f20947e0fb0d0c100c8d6a140 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Apr 2025 09:35:19 +0200 Subject: [PATCH 113/239] ASoC: Intel: catpt: avoid type mismatch in dev_dbg() format Depending on the architecture __ffs() returns either an 'unsigned long' or 'unsigned int' result. Compile-testing this driver on targets that use the latter produces a warning: sound/soc/intel/catpt/dsp.c: In function 'catpt_dsp_set_srampge': sound/soc/intel/catpt/dsp.c:181:44: error: format '%ld' expects argument of type 'long int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Werror=format=] 181 | dev_dbg(cdev->dev, "sanitize block %ld: off 0x%08x\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change the type of the local variable to match the format string and avoid the warning on any architecture. Signed-off-by: Arnd Bergmann Acked-by: Cezary Rojewski Link: https://patch.msgid.link/20250429073545.3558494-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/intel/catpt/dsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 5993819cc58a..008a20a2acbd 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -156,7 +156,7 @@ static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram, { unsigned long old; u32 off = sram->start; - u32 b = __ffs(mask); + unsigned long b = __ffs(mask); old = catpt_readl_pci(cdev, VDRTCTL0) & mask; dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx", From 36fd6275818e93d5bc44140d546bf2a45e88feee Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Mon, 28 Apr 2025 09:30:55 +0200 Subject: [PATCH 114/239] spi: spi-qpic-snand: fix NAND_READ_LOCATION_2 register handling The precomputed value for the NAND_READ_LOCATION_2 register should be stored in 'snandc->regs->read_location2'. Fix the qcom_spi_set_read_loc_first() function accordingly. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Md Sadre Alam Link: https://patch.msgid.link/20250428-qpic-snand-readloc2-fix-v1-1-50ce0877ff72@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index ae32c452d0bc..94948c8781e8 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -142,7 +142,7 @@ static void qcom_spi_set_read_loc_first(struct qcom_nand_controller *snandc, else if (reg == NAND_READ_LOCATION_1) snandc->regs->read_location1 = locreg_val; else if (reg == NAND_READ_LOCATION_2) - snandc->regs->read_location1 = locreg_val; + snandc->regs->read_location2 = locreg_val; else if (reg == NAND_READ_LOCATION_3) snandc->regs->read_location3 = locreg_val; } From 9fcd53c3206618166facf03ee3d465f66a107e01 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 29 Apr 2025 07:50:56 +0000 Subject: [PATCH 115/239] erofs: remove unused enum type Opt_err is not used in EROFS, we can remove it. Signed-off-by: Hongbo Li Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250429075056.689570-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index cadec6b1b554..da6ee7c39290 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -357,7 +357,6 @@ static void erofs_default_options(struct erofs_sb_info *sbi) enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, - Opt_err }; static const struct constant_table erofs_param_cache_strategy[] = { From 4d5b71b487291da9f92e352c0a7e39f256d60db8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Apr 2025 07:31:41 +0200 Subject: [PATCH 116/239] ALSA: hda/realtek: Fix built-mic regression on other ASUS models A few ASUS models use the ALC256_FIXUP_ASUS_HEADSET_MODE although they have no built-in mic pin on NID 0x13, as found in the commit c1732ede5e80 ("ALSA: hda/realtek - Fix headset and mic on several Asus laptops with ALC256"). This was relatively harmless in the past as NID 0x13 was assigned as the secondary mic. But since the fix for the pin sort order, this pin became the primary one, hence user started noticing the broken input, and we've fixed already for a few ASUS models to switch to ALC256_FIXUP_ASUS_MIC_NO_PRESENCE. This patch corrects the other ASUS models to use the right quirk entry for fixing the built-in mic regression. Here we cover X541SA (1043:12e0), X541UV (1043:12f0), Z550SA (1043:13bf0) and X555UB (1043:1ccd). Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220058 Link: https://patch.msgid.link/20250430053210.31776-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1799203af35a..7810d5f9b5aa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10910,10 +10910,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), - SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1460, "Asus VivoBook 15", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10967,7 +10967,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1ccf, "ASUS G814JU/JV/JI", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cdf, "ASUS G814JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cef, "ASUS G834JY/JZ/JI/JG", ALC285_FIXUP_ASUS_HEADSET_MIC), From 58f6217e5d0132a9f14e401e62796916aa055c1b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Apr 2025 17:13:55 -0700 Subject: [PATCH 117/239] perf/x86/intel: KVM: Mask PEBS_ENABLE loaded for guest with vCPU's value. When generating the MSR_IA32_PEBS_ENABLE value that will be loaded on VM-Entry to a KVM guest, mask the value with the vCPU's desired PEBS_ENABLE value. Consulting only the host kernel's host vs. guest masks results in running the guest with PEBS enabled even when the guest doesn't want to use PEBS. Because KVM uses perf events to proxy the guest virtual PMU, simply looking at exclude_host can't differentiate between events created by host userspace, and events created by KVM on behalf of the guest. Running the guest with PEBS unexpectedly enabled typically manifests as crashes due to a near-infinite stream of #PFs. E.g. if the guest hasn't written MSR_IA32_DS_AREA, the CPU will hit page faults on address '0' when trying to record PEBS events. The issue is most easily reproduced by running `perf kvm top` from before commit 7b100989b4f6 ("perf evlist: Remove __evlist__add_default") (after which, `perf kvm top` effectively stopped using PEBS). The userspace side of perf creates a guest-only PEBS event, which intel_guest_get_msrs() misconstrues a guest-*owned* PEBS event. Arguably, this is a userspace bug, as enabling PEBS on guest-only events simply cannot work, and userspace can kill VMs in many other ways (there is no danger to the host). However, even if this is considered to be bad userspace behavior, there's zero downside to perf/KVM restricting PEBS to guest-owned events. Note, commit 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations") fixed the case where host userspace is profiling KVM *and* userspace, but missed the case where userspace is profiling only KVM. Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Closes: https://lore.kernel.org/all/Z_VUswFkWiTYI0eD@do-x1carbon Reported-by: Seth Forshee Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: "Seth Forshee (DigitalOcean)" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250426001355.1026530-1-seanjc@google.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 00dfe487bd00..c5f385413392 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4395,7 +4395,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { From 77e40bbce93059658aee02786a32c5c98a240a8a Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Tue, 29 Apr 2025 10:42:01 -0600 Subject: [PATCH 118/239] nvme-tcp: fix premature queue removal and I/O failover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch addresses a data corruption issue observed in nvme-tcp during testing. In an NVMe native multipath setup, when an I/O timeout occurs, all inflight I/Os are canceled almost immediately after the kernel socket is shut down. These canceled I/Os are reported as host path errors, triggering a failover that succeeds on a different path. However, at this point, the original I/O may still be outstanding in the host's network transmission path (e.g., the NIC’s TX queue). From the user-space app's perspective, the buffer associated with the I/O is considered completed since they're acked on the different path and may be reused for new I/O requests. Because nvme-tcp enables zero-copy by default in the transmission path, this can lead to corrupted data being sent to the original target, ultimately causing data corruption. We can reproduce this data corruption by injecting delay on one path and triggering i/o timeout. To prevent this issue, this change ensures that all inflight transmissions are fully completed from host's perspective before returning from queue stop. To handle concurrent I/O timeout from multiple namespaces under the same controller, always wait in queue stop regardless of queue's state. This aligns with the behavior of queue stopping in other NVMe fabric transports. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Michael Liang Reviewed-by: Mohamed Khalfella Reviewed-by: Randy Jennings Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 72d260201d8c..aba365f97cf6 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1946,7 +1946,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1965,6 +1965,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -2032,7 +2057,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, From 521987940ad4fd37fe3d0340ec6f39c4e8e91e36 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 30 Apr 2025 08:40:25 +1000 Subject: [PATCH 119/239] nvme-tcp: select CONFIG_TLS from CONFIG_NVME_TCP_TLS Ensure that TLS support is enabled in the kernel when CONFIG_NVME_TCP_TLS is enabled. Without this the code compiles, but does not actually work unless something else enables CONFIG_TLS. Fixes: be8e82caa68 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Alistair Francis Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index d47dfa80fb95..4d64b6935bb9 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -102,6 +102,7 @@ config NVME_TCP_TLS depends on NVME_TCP select NET_HANDSHAKE select KEYS + select TLS help Enables TLS encryption for NVMe TCP using the netlink handshake API. From ac38b7ef704c0659568fd4b2c7e6c1255fc51798 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 30 Apr 2025 08:23:47 +1000 Subject: [PATCH 120/239] nvmet-tcp: select CONFIG_TLS from CONFIG_NVME_TARGET_TCP_TLS Ensure that TLS support is enabled in the kernel when CONFIG_NVME_TARGET_TCP_TLS is enabled. Without this the code compiles, but does not actually work unless something else enables CONFIG_TLS. Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall") Signed-off-by: Alistair Francis Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fb7446d6d682..4c253b433bf7 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -98,6 +98,7 @@ config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP select NET_HANDSHAKE + select TLS help Enables TLS encryption for the NVMe TCP target using the netlink handshake API. From 46d22b47df2741996af277a2838b95f130436c13 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 23 Apr 2025 16:06:21 +1000 Subject: [PATCH 121/239] nvmet-tcp: don't restore null sk_state_change queue->state_change is set as part of nvmet_tcp_set_queue_sock(), but if the TCP connection isn't established when nvmet_tcp_set_queue_sock() is called then queue->state_change isn't set and sock->sk->sk_state_change isn't replaced. As such we don't need to restore sock->sk->sk_state_change if queue->state_change is NULL. This avoids NULL pointer dereferences such as this: [ 286.462026][ C0] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 286.462814][ C0] #PF: supervisor instruction fetch in kernel mode [ 286.463796][ C0] #PF: error_code(0x0010) - not-present page [ 286.464392][ C0] PGD 8000000140620067 P4D 8000000140620067 PUD 114201067 PMD 0 [ 286.465086][ C0] Oops: Oops: 0010 [#1] SMP KASAN PTI [ 286.465559][ C0] CPU: 0 UID: 0 PID: 1628 Comm: nvme Not tainted 6.15.0-rc2+ #11 PREEMPT(voluntary) [ 286.466393][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 [ 286.467147][ C0] RIP: 0010:0x0 [ 286.467420][ C0] Code: Unable to access opcode bytes at 0xffffffffffffffd6. [ 286.467977][ C0] RSP: 0018:ffff8883ae008580 EFLAGS: 00010246 [ 286.468425][ C0] RAX: 0000000000000000 RBX: ffff88813fd34100 RCX: ffffffffa386cc43 [ 286.469019][ C0] RDX: 1ffff11027fa68b6 RSI: 0000000000000008 RDI: ffff88813fd34100 [ 286.469545][ C0] RBP: ffff88813fd34160 R08: 0000000000000000 R09: ffffed1027fa682c [ 286.470072][ C0] R10: ffff88813fd34167 R11: 0000000000000000 R12: ffff88813fd344c3 [ 286.470585][ C0] R13: ffff88813fd34112 R14: ffff88813fd34aec R15: ffff888132cdd268 [ 286.471070][ C0] FS: 00007fe3c04c7d80(0000) GS:ffff88840743f000(0000) knlGS:0000000000000000 [ 286.471644][ C0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 286.472543][ C0] CR2: ffffffffffffffd6 CR3: 000000012daca000 CR4: 00000000000006f0 [ 286.473500][ C0] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 286.474467][ C0] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 [ 286.475453][ C0] Call Trace: [ 286.476102][ C0] [ 286.476719][ C0] tcp_fin+0x2bb/0x440 [ 286.477429][ C0] tcp_data_queue+0x190f/0x4e60 [ 286.478174][ C0] ? __build_skb_around+0x234/0x330 [ 286.478940][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.479659][ C0] ? __pfx_tcp_data_queue+0x10/0x10 [ 286.480431][ C0] ? tcp_try_undo_loss+0x640/0x6c0 [ 286.481196][ C0] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 286.482046][ C0] ? kvm_clock_get_cycles+0x14/0x30 [ 286.482769][ C0] ? ktime_get+0x66/0x150 [ 286.483433][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.484146][ C0] tcp_rcv_established+0x6e4/0x2050 [ 286.484857][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.485523][ C0] ? ipv4_dst_check+0x160/0x2b0 [ 286.486203][ C0] ? __pfx_tcp_rcv_established+0x10/0x10 [ 286.486917][ C0] ? lock_release+0x217/0x2c0 [ 286.487595][ C0] tcp_v4_do_rcv+0x4d6/0x9b0 [ 286.488279][ C0] tcp_v4_rcv+0x2af8/0x3e30 [ 286.488904][ C0] ? raw_local_deliver+0x51b/0xad0 [ 286.489551][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.490198][ C0] ? __pfx_tcp_v4_rcv+0x10/0x10 [ 286.490813][ C0] ? __pfx_raw_local_deliver+0x10/0x10 [ 286.491487][ C0] ? __pfx_nf_confirm+0x10/0x10 [nf_conntrack] [ 286.492275][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.492900][ C0] ip_protocol_deliver_rcu+0x8f/0x370 [ 286.493579][ C0] ip_local_deliver_finish+0x297/0x420 [ 286.494268][ C0] ip_local_deliver+0x168/0x430 [ 286.494867][ C0] ? __pfx_ip_local_deliver+0x10/0x10 [ 286.495498][ C0] ? __pfx_ip_local_deliver_finish+0x10/0x10 [ 286.496204][ C0] ? ip_rcv_finish_core+0x19a/0x1f20 [ 286.496806][ C0] ? lock_release+0x217/0x2c0 [ 286.497414][ C0] ip_rcv+0x455/0x6e0 [ 286.497945][ C0] ? __pfx_ip_rcv+0x10/0x10 [ 286.498550][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.499137][ C0] ? __pfx_ip_rcv_finish+0x10/0x10 [ 286.499763][ C0] ? lock_release+0x217/0x2c0 [ 286.500327][ C0] ? dl_scaled_delta_exec+0xd1/0x2c0 [ 286.500922][ C0] ? __pfx_ip_rcv+0x10/0x10 [ 286.501480][ C0] __netif_receive_skb_one_core+0x166/0x1b0 [ 286.502173][ C0] ? __pfx___netif_receive_skb_one_core+0x10/0x10 [ 286.502903][ C0] ? lock_acquire+0x2b2/0x310 [ 286.503487][ C0] ? process_backlog+0x372/0x1350 [ 286.504087][ C0] ? lock_release+0x217/0x2c0 [ 286.504642][ C0] process_backlog+0x3b9/0x1350 [ 286.505214][ C0] ? process_backlog+0x372/0x1350 [ 286.505779][ C0] __napi_poll.constprop.0+0xa6/0x490 [ 286.506363][ C0] net_rx_action+0x92e/0xe10 [ 286.506889][ C0] ? __pfx_net_rx_action+0x10/0x10 [ 286.507437][ C0] ? timerqueue_add+0x1f0/0x320 [ 286.507977][ C0] ? sched_clock_cpu+0x68/0x540 [ 286.508492][ C0] ? lock_acquire+0x2b2/0x310 [ 286.509043][ C0] ? kvm_sched_clock_read+0xd/0x20 [ 286.509607][ C0] ? handle_softirqs+0x1aa/0x7d0 [ 286.510187][ C0] handle_softirqs+0x1f2/0x7d0 [ 286.510754][ C0] ? __pfx_handle_softirqs+0x10/0x10 [ 286.511348][ C0] ? irqtime_account_irq+0x181/0x290 [ 286.511937][ C0] ? __dev_queue_xmit+0x85d/0x3450 [ 286.512510][ C0] do_softirq.part.0+0x89/0xc0 [ 286.513100][ C0] [ 286.513548][ C0] [ 286.513953][ C0] __local_bh_enable_ip+0x112/0x140 [ 286.514522][ C0] ? __dev_queue_xmit+0x85d/0x3450 [ 286.515072][ C0] __dev_queue_xmit+0x872/0x3450 [ 286.515619][ C0] ? nft_do_chain+0xe16/0x15b0 [nf_tables] [ 286.516252][ C0] ? __pfx___dev_queue_xmit+0x10/0x10 [ 286.516817][ C0] ? selinux_ip_postroute+0x43c/0xc50 [ 286.517433][ C0] ? __pfx_selinux_ip_postroute+0x10/0x10 [ 286.518061][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.518606][ C0] ? ip_output+0x164/0x4a0 [ 286.519149][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.519671][ C0] ? ip_finish_output2+0x17d5/0x1fb0 [ 286.520258][ C0] ip_finish_output2+0xb4b/0x1fb0 [ 286.520787][ C0] ? __pfx_ip_finish_output2+0x10/0x10 [ 286.521355][ C0] ? __ip_finish_output+0x15d/0x750 [ 286.521890][ C0] ip_output+0x164/0x4a0 [ 286.522372][ C0] ? __pfx_ip_output+0x10/0x10 [ 286.522872][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.523402][ C0] ? _raw_spin_unlock_irqrestore+0x4c/0x60 [ 286.524031][ C0] ? __pfx_ip_finish_output+0x10/0x10 [ 286.524605][ C0] ? __ip_queue_xmit+0x999/0x2260 [ 286.525200][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.525744][ C0] ? ipv4_dst_check+0x16a/0x2b0 [ 286.526279][ C0] ? lock_release+0x217/0x2c0 [ 286.526793][ C0] __ip_queue_xmit+0x1883/0x2260 [ 286.527324][ C0] ? __skb_clone+0x54c/0x730 [ 286.527827][ C0] __tcp_transmit_skb+0x209b/0x37a0 [ 286.528374][ C0] ? __pfx___tcp_transmit_skb+0x10/0x10 [ 286.528952][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.529472][ C0] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 286.530152][ C0] ? trace_hardirqs_on+0x12/0x120 [ 286.530691][ C0] tcp_write_xmit+0xb81/0x88b0 [ 286.531224][ C0] ? mod_memcg_state+0x4d/0x60 [ 286.531736][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.532253][ C0] __tcp_push_pending_frames+0x90/0x320 [ 286.532826][ C0] tcp_send_fin+0x141/0xb50 [ 286.533352][ C0] ? __pfx_tcp_send_fin+0x10/0x10 [ 286.533908][ C0] ? __local_bh_enable_ip+0xab/0x140 [ 286.534495][ C0] inet_shutdown+0x243/0x320 [ 286.535077][ C0] nvme_tcp_alloc_queue+0xb3b/0x2590 [nvme_tcp] [ 286.535709][ C0] ? do_raw_spin_lock+0x129/0x260 [ 286.536314][ C0] ? __pfx_nvme_tcp_alloc_queue+0x10/0x10 [nvme_tcp] [ 286.536996][ C0] ? do_raw_spin_unlock+0x54/0x1e0 [ 286.537550][ C0] ? _raw_spin_unlock+0x29/0x50 [ 286.538127][ C0] ? do_raw_spin_lock+0x129/0x260 [ 286.538664][ C0] ? __pfx_do_raw_spin_lock+0x10/0x10 [ 286.539249][ C0] ? nvme_tcp_alloc_admin_queue+0xd5/0x340 [nvme_tcp] [ 286.539892][ C0] ? __wake_up+0x40/0x60 [ 286.540392][ C0] nvme_tcp_alloc_admin_queue+0xd5/0x340 [nvme_tcp] [ 286.541047][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.541589][ C0] nvme_tcp_setup_ctrl+0x8b/0x7a0 [nvme_tcp] [ 286.542254][ C0] ? _raw_spin_unlock_irqrestore+0x4c/0x60 [ 286.542887][ C0] ? __pfx_nvme_tcp_setup_ctrl+0x10/0x10 [nvme_tcp] [ 286.543568][ C0] ? trace_hardirqs_on+0x12/0x120 [ 286.544166][ C0] ? _raw_spin_unlock_irqrestore+0x35/0x60 [ 286.544792][ C0] ? nvme_change_ctrl_state+0x196/0x2e0 [nvme_core] [ 286.545477][ C0] nvme_tcp_create_ctrl+0x839/0xb90 [nvme_tcp] [ 286.546126][ C0] nvmf_dev_write+0x3db/0x7e0 [nvme_fabrics] [ 286.546775][ C0] ? rw_verify_area+0x69/0x520 [ 286.547334][ C0] vfs_write+0x218/0xe90 [ 286.547854][ C0] ? do_syscall_64+0x9f/0x190 [ 286.548408][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.549037][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.549659][ C0] ? __pfx_vfs_write+0x10/0x10 [ 286.550259][ C0] ? do_syscall_64+0x9f/0x190 [ 286.550840][ C0] ? syscall_exit_to_user_mode+0x8e/0x280 [ 286.551516][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.552180][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.552834][ C0] ? ksys_read+0xf5/0x1c0 [ 286.553386][ C0] ? __pfx_ksys_read+0x10/0x10 [ 286.553964][ C0] ksys_write+0xf5/0x1c0 [ 286.554499][ C0] ? __pfx_ksys_write+0x10/0x10 [ 286.555072][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.555698][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.556319][ C0] ? do_syscall_64+0x54/0x190 [ 286.556866][ C0] do_syscall_64+0x93/0x190 [ 286.557420][ C0] ? rcu_read_unlock+0x17/0x60 [ 286.557986][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.558526][ C0] ? lock_release+0x217/0x2c0 [ 286.559087][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.559659][ C0] ? count_memcg_events.constprop.0+0x4a/0x60 [ 286.560476][ C0] ? exc_page_fault+0x7a/0x110 [ 286.561064][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.561647][ C0] ? lock_release+0x217/0x2c0 [ 286.562257][ C0] ? do_user_addr_fault+0x171/0xa00 [ 286.562839][ C0] ? do_user_addr_fault+0x4a2/0xa00 [ 286.563453][ C0] ? irqentry_exit_to_user_mode+0x84/0x270 [ 286.564112][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.564677][ C0] ? irqentry_exit_to_user_mode+0x84/0x270 [ 286.565317][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.565922][ C0] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 286.566542][ C0] RIP: 0033:0x7fe3c05e6504 [ 286.567102][ C0] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d c5 8b 10 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 [ 286.568931][ C0] RSP: 002b:00007fff76444f58 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 286.569807][ C0] RAX: ffffffffffffffda RBX: 000000003b40d930 RCX: 00007fe3c05e6504 [ 286.570621][ C0] RDX: 00000000000000cf RSI: 000000003b40d930 RDI: 0000000000000003 [ 286.571443][ C0] RBP: 0000000000000003 R08: 00000000000000cf R09: 000000003b40d930 [ 286.572246][ C0] R10: 0000000000000000 R11: 0000000000000202 R12: 000000003b40cd60 [ 286.573069][ C0] R13: 00000000000000cf R14: 00007fe3c07417f8 R15: 00007fe3c073502e [ 286.573886][ C0] Closes: https://lore.kernel.org/linux-nvme/5hdonndzoqa265oq3bj6iarwtfk5dewxxjtbjvn5uqnwclpwt6@a2n6w3taxxex/ Signed-off-by: Alistair Francis Reviewed-by: Sagi Grimberg Tested-by: Shin'ichiro Kawasaki Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f2d0c920269b..12a5cb8641ca 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1560,6 +1560,9 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + if (!queue->state_change) + return; + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_data_ready = queue->data_ready; sock->sk->sk_state_change = queue->state_change; From 8edb86b2ed1d63cc400aecae8eb8c8114837171a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 Apr 2025 11:34:34 +0200 Subject: [PATCH 122/239] nvmet-auth: always free derived key data After calling nvme_auth_derive_tls_psk() we need to free the resulting psk data, as either TLS is disable (and we don't need the data anyway) or the psk data is copied into the resulting key (and can be free, too). Fixes: fa2e0f8bbc68 ("nvmet-tcp: support secure channel concatenation") Reported-by: Yi Zhang Suggested-by: Maurizio Lombardi Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/target/auth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index cef8d77f477b..9429b8218408 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -600,13 +600,12 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); tls_key = NULL; - kfree_sensitive(tls_psk); } if (sq->ctrl->tls_key) key_put(sq->ctrl->tls_key); sq->ctrl->tls_key = tls_key; #endif - + kfree_sensitive(tls_psk); out_free_digest: kfree_sensitive(digest); out_free_psk: From f024d3a8ded0d8d2129ae123d7a5305c29ca44ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Apr 2025 07:17:17 -0600 Subject: [PATCH 123/239] io_uring/fdinfo: annotate racy sq/cq head/tail reads syzbot complains about the cached sq head read, and it's totally right. But we don't need to care, it's just reading fdinfo, and reading the CQ or SQ tail/head entries are known racy in that they are just a view into that very instant and may of course be outdated by the time they are reported. Annotate both the SQ head and CQ tail read with data_race() to avoid this syzbot complaint. Link: https://lore.kernel.org/io-uring/6811f6dc.050a0220.39e3a1.0d0e.GAE@google.com/ Reported-by: syzbot+3e77fd302e99f5af9394@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f60d0a9d505e..9414ca6d101c 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -123,11 +123,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqMask:\t0x%x\n", sq_mask); seq_printf(m, "SqHead:\t%u\n", sq_head); seq_printf(m, "SqTail:\t%u\n", sq_tail); - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); + seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head)); seq_printf(m, "CqMask:\t0x%x\n", cq_mask); seq_printf(m, "CqHead:\t%u\n", cq_head); seq_printf(m, "CqTail:\t%u\n", cq_tail); - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); + seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail)); seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head); sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { From 533a8a67cc3b072a24122d1ea10a8db66a5c9393 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 28 Apr 2025 21:50:07 +0200 Subject: [PATCH 124/239] soundwire: intel_auxdevice: Fix system suspend/resume handling Before commit bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") the runtime PM status of the device in intel_resume() had always been RPM_ACTIVE because setting DPM_FLAG_SMART_SUSPEND had caused the core to call pm_runtime_set_active() for that device during the "noirq" resume phase. For this reason, the pm_runtime_suspended() check in intel_resume() had never triggered and the code depending on it had never run. That had not caused any observable functional issues to appear, so effectively the code in question had never been needed. After commit bca84a7b93fd the core does not call pm_runtime_set_active() for all devices with DPM_FLAG_SMART_SUSPEND set any more and the code depending on the pm_runtime_suspended() check in intel_resume() runs if the device is runtime-suspended prior to a system-wide suspend transition. Unfortunately, when it runs, it breaks things due to the attempt to runtime-resume bus->dev which most likely is not ready for a runtime resume at that point. It also does other more-or-less questionable things. Namely, it calls pm_runtime_idle() for a device with a nonzero runtime PM usage counter which has no effect (all devices have nonzero runtime PM usage counters during system-wide suspend and resume). It also calls pm_runtime_mark_last_busy() for the device even though devices cannot runtime-suspend during system-wide suspend and resume (because their runtime PM usage counters are nonzero) and an analogous call is made in the same function later. Moreover, it sets the runtime PM status of the device to RPM_ACTIVE before activating it. For the reasons listed above, remove that code altogether. On top of that, add a pm_runtime_disable() call to intel_suspend() to prevent the device from being runtime-resumed at any point after intel_suspend() has started to manipulate it because the changes made by that function would be undone by a runtime-suspend of the device. Next, once runtime PM has been disabled, the runtime PM status of the device cannot change, so pm_runtime_status_suspended() can be used instead of pm_runtime_suspended() in intel_suspend(). Finally, make intel_resume() call pm_runtime_set_active() at the end to set the runtime PM status of the device to "active" because it has just been activated and re-enable runtime PM for it after that. Additionally, drop the setting of DPM_FLAG_SMART_SUSPEND from the driver because it has no effect on devices handled by it. Fixes: bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") Reported-by: Bard Liao Tested-by: Bard Liao Signed-off-by: Rafael J. Wysocki Acked-by: Bard Liao Link: https://patch.msgid.link/12680420.O9o76ZdvQC@rjwysocki.net --- drivers/soundwire/intel_auxdevice.c | 36 +++++++++++------------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 5ea6399e6c9b..10a602d4843a 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -353,9 +353,6 @@ static int intel_link_probe(struct auxiliary_device *auxdev, /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - ret = sdw_bus_master_add(bus, dev, dev->fwnode); if (ret) { dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); @@ -640,7 +637,10 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { + /* Prevent runtime PM from racing with the code below. */ + pm_runtime_disable(dev); + + if (pm_runtime_status_suspended(dev)) { dev_dbg(dev, "pm_runtime status: suspended\n"); clock_stop_quirks = sdw->link_res->clock_stop_quirks; @@ -648,7 +648,7 @@ static int __maybe_unused intel_suspend(struct device *dev) if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || !clock_stop_quirks) { - if (pm_runtime_suspended(dev->parent)) { + if (pm_runtime_status_suspended(dev->parent)) { /* * paranoia check: this should not happen with the .prepare * resume to full power @@ -715,7 +715,6 @@ static int __maybe_unused intel_resume(struct device *dev) struct sdw_cdns *cdns = dev_get_drvdata(dev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; - int link_flags; int ret; if (bus->prop.hw_disabled || !sdw->startup_done) { @@ -724,23 +723,6 @@ static int __maybe_unused intel_resume(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - pm_runtime_resume(bus->dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); @@ -760,6 +742,14 @@ static int __maybe_unused intel_resume(struct device *dev) return ret; } + /* + * Runtime PM has been disabled in intel_suspend(), so set the status + * to active because the device has just been resumed and re-enable + * runtime PM. + */ + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + /* * after system resume, the pm_runtime suspend() may kick in * during the enumeration, before any children device force the From ac4e04d9e378f5aa826c2406ad7871ae1b6a6fb9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 29 Apr 2025 14:07:11 -0700 Subject: [PATCH 125/239] cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode When turbo mode is unavailable on a Skylake-X system, executing the command: # echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo results in an unchecked MSR access error: WRMSR to 0x199 (attempted to write 0x0000000100001300). This issue was reproduced on an OEM (Original Equipment Manufacturer) system and is not a common problem across all Skylake-X systems. This error occurs because the MSR 0x199 Turbo Engage Bit (bit 32) is set when turbo mode is disabled. The issue arises when intel_pstate fails to detect that turbo mode is disabled. Here intel_pstate relies on MSR_IA32_MISC_ENABLE bit 38 to determine the status of turbo mode. However, on this system, bit 38 is not set even when turbo mode is disabled. According to the Intel Software Developer's Manual (SDM), the BIOS sets this bit during platform initialization to enable or disable opportunistic processor performance operations. Logically, this bit should be set in such cases. However, the SDM also specifies that "OS and applications must use CPUID leaf 06H to detect processors with opportunistic processor performance operations enabled." Therefore, in addition to checking MSR_IA32_MISC_ENABLE bit 38, verify that CPUID.06H:EAX[1] is 0 to accurately determine if turbo mode is disabled. Fixes: 4521e1a0ce17 ("cpufreq: intel_pstate: Reflect current no_turbo state correctly") Signed-off-by: Srinivas Pandruvada Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f41ed0b9e610..ba9bf06f1c77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -598,6 +598,9 @@ static bool turbo_is_disabled(void) { u64 misc_en; + if (!cpu_feature_enabled(X86_FEATURE_IDA)) + return true; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); From 74c72419ec8da5cbc9c49410d3c44bb954538bdd Mon Sep 17 00:00:00 2001 From: Jethro Donaldson Date: Wed, 30 Apr 2025 00:59:15 +1200 Subject: [PATCH 126/239] smb: client: fix zero length for mkdir POSIX create context SMB create requests issued via smb311_posix_mkdir() have an incorrect length of zero bytes for the POSIX create context data. ksmbd server rejects such requests and logs "cli req too short" causing mkdir to fail with "invalid argument" on the client side. It also causes subsequent rmmod to crash in cifs_destroy_request_bufs() Inspection of packets sent by cifs.ko using wireshark show valid data for the SMB2_POSIX_CREATE_CONTEXT is appended with the correct offset, but with an incorrect length of zero bytes. Fails with ksmbd+cifs.ko only as Windows server/client does not use POSIX extensions. Fix smb311_posix_mkdir() to set req->CreateContextsLength as part of appending the POSIX creation context to the request. Signed-off-by: Jethro Donaldson Acked-by: Paulo Alcantara (Red Hat) Reviewed-by: Namjae Jeon Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c4d52bebd37d..a9a49555d43b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2921,6 +2921,7 @@ replay_again: req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len); pc_buf = iov[n_iov-1].iov_base; } From 1041c117a2c33cdffc4f695ac4b469e9124d24d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 30 Dec 2024 20:34:18 +0100 Subject: [PATCH 127/239] cifs: Fix and improve cifs_query_path_info() and cifs_query_file_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CAP_NT_SMBS was not negotiated then do not issue CIFSSMBQPathInfo() and CIFSSMBQFileInfo() commands. CIFSSMBQPathInfo() is not supported by non-NT Win9x SMB server and CIFSSMBQFileInfo() returns from Win9x SMB server bogus data in Attributes field (for example lot of files are marked as reparse points, even Win9x does not support them and read-only bit is not marked for read-only files). Correct information is returned by CIFSFindFirst() or SMBQueryInformation() command. So as a fallback in cifs_query_path_info() function use CIFSFindFirst() with SMB_FIND_FILE_FULL_DIRECTORY_INFO level which is supported by both NT and non-NT servers and as a last option use SMBQueryInformation() as it was before. And in function cifs_query_file_info() immediately returns -EOPNOTSUPP when not communicating with NT server. Client then revalidate inode entry by the cifs_query_path_info() call, which is working fine. So fstat() syscall on already opened file will receive correct information. Note that both fallback functions in non-UNICODE mode expands wildcards. Therefore those fallback functions cannot be used on paths which contain SMB wildcard characters (* ? " > <). CIFSFindFirst() returns all 4 time attributes as opposite of SMBQueryInformation() which returns only one. With this change it is possible to query all 4 times attributes from Win9x server and at the same time, client minimize sending of unsupported commands to server. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb1ops.c | 103 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 0adeec652dc1..5c7a80bce5bd 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -543,24 +543,104 @@ static int cifs_query_path_info(const unsigned int xid, const char *full_path, struct cifs_open_info_data *data) { - int rc; + int rc = -EOPNOTSUPP; FILE_ALL_INFO fi = {}; + struct cifs_search_info search_info = {}; + bool non_unicode_wildcard = false; data->reparse_point = false; data->adjust_tz = false; - /* could do find first instead but this returns more info */ - rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls, - cifs_remap(cifs_sb)); /* - * BB optimize code so we do not make the above call when server claims - * no NT SMB support and the above call failed at least once - set flag - * in tcon or mount. + * First try CIFSSMBQPathInfo() function which returns more info + * (NumberOfLinks) than CIFSFindFirst() fallback function. + * Some servers like Win9x do not support SMB_QUERY_FILE_ALL_INFO over + * TRANS2_QUERY_PATH_INFORMATION, but supports it with filehandle over + * TRANS2_QUERY_FILE_INFORMATION (function CIFSSMBQFileInfo(). But SMB + * Open command on non-NT servers works only for files, does not work + * for directories. And moreover Win9x SMB server returns bogus data in + * SMB_QUERY_FILE_ALL_INFO Attributes field. So for non-NT servers, + * do not even use CIFSSMBQPathInfo() or CIFSSMBQFileInfo() function. */ - if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { + if (tcon->ses->capabilities & CAP_NT_SMBS) + rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + + /* + * Non-UNICODE variant of fallback functions below expands wildcards, + * so they cannot be used for querying paths with wildcard characters. + */ + if (rc && !(tcon->ses->capabilities & CAP_UNICODE) && strpbrk(full_path, "*?\"><")) + non_unicode_wildcard = true; + + /* + * Then fallback to CIFSFindFirst() which works also with non-NT servers + * but does not does not provide NumberOfLinks. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && + !non_unicode_wildcard) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) + search_info.info_level = SMB_FIND_FILE_INFO_STANDARD; + else + search_info.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; + rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb, NULL, + CIFS_SEARCH_CLOSE_ALWAYS | CIFS_SEARCH_CLOSE_AT_END, + &search_info, false); + if (rc == 0) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) { + FIND_FILE_STANDARD_INFO *di; + int offset = tcon->ses->server->timeAdj; + + di = (FIND_FILE_STANDARD_INFO *)search_info.srch_entries_start; + fi.CreationTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->CreationDate, di->CreationTime, offset))); + fi.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastAccessDate, di->LastAccessTime, offset))); + fi.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastWriteDate, di->LastWriteTime, offset))); + fi.ChangeTime = fi.LastWriteTime; + fi.Attributes = cpu_to_le32(le16_to_cpu(di->Attributes)); + fi.AllocationSize = cpu_to_le64(le32_to_cpu(di->AllocationSize)); + fi.EndOfFile = cpu_to_le64(le32_to_cpu(di->DataSize)); + } else { + FILE_FULL_DIRECTORY_INFO *di; + + di = (FILE_FULL_DIRECTORY_INFO *)search_info.srch_entries_start; + fi.CreationTime = di->CreationTime; + fi.LastAccessTime = di->LastAccessTime; + fi.LastWriteTime = di->LastWriteTime; + fi.ChangeTime = di->ChangeTime; + fi.Attributes = di->ExtFileAttributes; + fi.AllocationSize = di->AllocationSize; + fi.EndOfFile = di->EndOfFile; + fi.EASize = di->EaSize; + } + fi.NumberOfLinks = cpu_to_le32(1); + fi.DeletePending = 0; + fi.Directory = !!(le32_to_cpu(fi.Attributes) & ATTR_DIRECTORY); + cifs_buf_release(search_info.ntwrk_buf_start); + } else if (!full_path[0]) { + /* + * CIFSFindFirst() does not work on root path if the + * root path was exported on the server from the top + * level path (drive letter). + */ + rc = -EOPNOTSUPP; + } + } + + /* + * If everything failed then fallback to the legacy SMB command + * SMB_COM_QUERY_INFORMATION which works with all servers, but + * provide just few information. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && !non_unicode_wildcard) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); data->adjust_tz = true; + } else if ((rc == -EOPNOTSUPP || rc == -EINVAL) && non_unicode_wildcard) { + /* Path with non-UNICODE wildcard character cannot exist. */ + rc = -ENOENT; } if (!rc) { @@ -639,6 +719,13 @@ static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; FILE_ALL_INFO fi = {}; + /* + * CIFSSMBQFileInfo() for non-NT servers returns bogus data in + * Attributes fields. So do not use this command for non-NT servers. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return -EOPNOTSUPP; + if (cfile->symlink_target) { data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!data->symlink_target) From f122121796f91168d0894c2710b8dd71330a34f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 30 Dec 2024 21:32:39 +0100 Subject: [PATCH 128/239] cifs: Fix changing times and read-only attr over SMB1 smb_set_file_info() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function CIFSSMBSetPathInfo() is not supported by non-NT servers and returns error. Fallback code via open filehandle and CIFSSMBSetFileInfo() does not work neither because CIFS_open() works also only on NT server. Therefore currently the whole smb_set_file_info() function as a SMB1 callback for the ->set_file_info() does not work with older non-NT SMB servers, like Win9x and others. This change implements fallback code in smb_set_file_info() which will works with any server and allows to change time values and also to set or clear read-only attributes. To make existing fallback code via CIFSSMBSetFileInfo() working with also non-NT servers, it is needed to change open function from CIFS_open() (which is NT specific) to cifs_open_file() which works with any server (this is just a open wrapper function which choose the correct open function supported by the server). CIFSSMBSetFileInfo() is working also on non-NT servers, but zero time values are not treated specially. So first it is needed to fill all time values if some of them are missing, via cifs_query_path_info() call. There is another issue, opening file in write-mode (needed for changing attributes) is not possible when the file has read-only attribute set. The only option how to clear read-only attribute is via SMB_COM_SETATTR command. And opening directory is not possible neither and here the SMB_COM_SETATTR command is the only option how to change attributes. And CIFSSMBSetFileInfo() does not honor setting read-only attribute, so for setting is also needed to use SMB_COM_SETATTR command. Existing code in cifs_query_path_info() is already using SMB_COM_GETATTR as a fallback code path (function SMBQueryInformation()), so introduce a new function SMBSetInformation which will implement SMB_COM_SETATTR command. My testing showed that Windows XP SMB1 client is also using SMB_COM_SETATTR command for setting or clearing read-only attribute against non-NT server. So this can prove that this is the correct way how to do it. With this change it is possible set all 4 time values and all attributes, including clearing and setting read-only bit on non-NT SMB servers. Tested against Win98 SMB1 server. This change fixes "touch" command which was failing when called on existing file. And fixes also "chmod +w" and "chmod -w" commands which were also failing (as they are changing read-only attribute). Note that this change depends on following change "cifs: Improve cifs_query_path_info() and cifs_query_file_info()" as it require to query all 4 time attribute values. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 5 +- fs/smb/client/cifsproto.h | 4 ++ fs/smb/client/cifssmb.c | 57 +++++++++++++++++++ fs/smb/client/smb1ops.c | 112 +++++++++++++++++++++++++++++++++++--- 4 files changed, 166 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 18d67ab113f0..1b79fe07476f 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1266,10 +1266,9 @@ typedef struct smb_com_query_information_rsp { typedef struct smb_com_setattr_req { struct smb_hdr hdr; /* wct = 8 */ __le16 attr; - __le16 time_low; - __le16 time_high; + __le32 last_write_time; __le16 reserved[5]; /* must be zero */ - __u16 ByteCount; + __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 59f6fdfe560e..ecf774a8f1ca 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -395,6 +395,10 @@ extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon); extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); +extern int SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 60cb264a01e5..f55457b4b82e 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5171,6 +5171,63 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +int +SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb) +{ + SETATTR_REQ *pSMB; + SETATTR_RSP *pSMBr; + struct timespec64 ts; + int bytes_returned; + int name_len; + int rc; + + cifs_dbg(FYI, "In %s path %s\n", __func__, fileName); + +retry: + rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len = + cifsConvertToUTF16((__le16 *) pSMB->fileName, + fileName, PATH_MAX, nls_codepage, + cifs_remap(cifs_sb)); + name_len++; /* trailing null */ + name_len *= 2; + } else { + name_len = copy_path_name(pSMB->fileName, fileName); + } + /* Only few attributes can be set by this command, others are not accepted by Win9x. */ + pSMB->attr = cpu_to_le16(le32_to_cpu(attributes) & + (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_ARCHIVE)); + /* Zero write time value (in both NT and SETATTR formats) means to not change it. */ + if (le64_to_cpu(write_time) != 0) { + ts = cifs_NTtimeToUnix(write_time); + pSMB->last_write_time = cpu_to_le32(ts.tv_sec); + } + pSMB->BufferFormat = 0x04; + name_len++; /* account for buffer type byte */ + inc_rfc1001_len(pSMB, (__u16)name_len); + pSMB->ByteCount = cpu_to_le16(name_len); + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) + cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc); + + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto retry; + + return rc; +} + /* Some legacy servers such as NT4 require that the file times be set on an open handle, rather than by pathname - this is awkward due to potential access conflicts on the open, but it is unavoidable for these diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 5c7a80bce5bd..ab26b9e9b0e7 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -896,6 +896,9 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifsFileInfo *open_file; + FILE_BASIC_INFO new_buf; + struct cifs_open_info_data query_data; + __le64 write_time = buf->LastWriteTime; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; @@ -903,20 +906,58 @@ smb_set_file_info(struct inode *inode, const char *full_path, /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; tcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; + } else { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); } - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; + /* + * Non-NT servers interprets zero time value in SMB_SET_FILE_BASIC_INFO + * over TRANS2_SET_FILE_INFORMATION as a valid time value. NT servers + * interprets zero time value as do not change existing value on server. + * API of ->set_file_info() callback expects that zero time value has + * the NT meaning - do not change. Therefore if server is non-NT and + * some time values in "buf" are zero, then fetch missing time values. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS) && + (!buf->CreationTime || !buf->LastAccessTime || + !buf->LastWriteTime || !buf->ChangeTime)) { + rc = cifs_query_path_info(xid, tcon, cifs_sb, full_path, &query_data); + if (rc) { + if (open_file) { + cifsFileInfo_put(open_file); + open_file = NULL; + } + goto out; + } + /* + * Original write_time from buf->LastWriteTime is preserved + * as SMBSetInformation() interprets zero as do not change. + */ + new_buf = *buf; + buf = &new_buf; + if (!buf->CreationTime) + buf->CreationTime = query_data.fi.CreationTime; + if (!buf->LastAccessTime) + buf->LastAccessTime = query_data.fi.LastAccessTime; + if (!buf->LastWriteTime) + buf->LastWriteTime = query_data.fi.LastWriteTime; + if (!buf->ChangeTime) + buf->ChangeTime = query_data.fi.ChangeTime; } - tcon = tlink_tcon(tlink); + + if (open_file) + goto set_via_filehandle; rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, cifs_sb); @@ -937,8 +978,45 @@ smb_set_file_info(struct inode *inode, const char *full_path, .fid = &fid, }; - cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); - rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (S_ISDIR(inode->i_mode) && !(tcon->ses->capabilities & CAP_NT_SMBS)) { + /* Opening directory path is not possible on non-NT servers. */ + rc = -EOPNOTSUPP; + } else { + /* + * Use cifs_open_file() instead of CIFS_open() as the + * cifs_open_file() selects the correct function which + * works also on non-NT servers. + */ + rc = cifs_open_file(xid, &oparms, &oplock, NULL); + /* + * Opening path for writing on non-NT servers is not + * possible when the read-only attribute is already set. + * Non-NT server in this case returns -EACCES. For those + * servers the only possible way how to clear the read-only + * bit is via SMB_COM_SETATTR command. + */ + if (rc == -EACCES && + (cinode->cifsAttrs & ATTR_READONLY) && + le32_to_cpu(buf->Attributes) != 0 && /* 0 = do not change attrs */ + !(le32_to_cpu(buf->Attributes) & ATTR_READONLY) && + !(tcon->ses->capabilities & CAP_NT_SMBS)) + rc = -EOPNOTSUPP; + } + + /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */ + if (rc == -EOPNOTSUPP) { + cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n"); + rc = SMBSetInformation(xid, tcon, full_path, + buf->Attributes != 0 ? buf->Attributes : cpu_to_le32(cinode->cifsAttrs), + write_time, + cifs_sb->local_nls, cifs_sb); + if (rc == 0) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + else + rc = -EACCES; + goto out; + } + if (rc != 0) { if (rc == -EIO) rc = -EINVAL; @@ -946,6 +1024,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, } netpid = current->tgid; + cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for attrs/times not supported by this server\n"); set_via_filehandle: rc = CIFSSMBSetFileInfo(xid, tcon, buf, fid.netfid, netpid); @@ -956,6 +1035,21 @@ set_via_filehandle: CIFSSMBClose(xid, tcon, fid.netfid); else cifsFileInfo_put(open_file); + + /* + * Setting the read-only bit is not honered on non-NT servers when done + * via open-semantics. So for setting it, use SMB_COM_SETATTR command. + * This command works only after the file is closed, so use it only when + * operation was called without the filehandle. + */ + if (open_file == NULL && + !(tcon->ses->capabilities & CAP_NT_SMBS) && + le32_to_cpu(buf->Attributes) & ATTR_READONLY) { + SMBSetInformation(xid, tcon, full_path, + buf->Attributes, + 0 /* do not change write time */, + cifs_sb->local_nls, cifs_sb); + } out: if (tlink != NULL) cifs_put_tlink(tlink); From 2feaa92c7c0123013a2a3e3d02ff8a5f5a794e96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 23:33:06 -0400 Subject: [PATCH 129/239] bcachefs: improve missing journal write device error message Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2a54ac79189b..63cdf885c9e2 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1782,7 +1782,7 @@ static CLOSURE_CALLBACK(journal_write_submit) struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); if (!ca) { /* XXX: fix this */ - bch_err(c, "missing device for journal write\n"); + bch_err(c, "missing device %u for journal write", ptr->dev); continue; } From 5e63d579e752549dc256a952bcb35ade398ee921 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 29 Apr 2025 14:30:01 -0400 Subject: [PATCH 130/239] bcachefs: readdir fixes - Don't call bch2_trans_relock() after dir_emit(); taking a transaction restart here will cause us to emit the same dirent to userspace twice - Fix incorrect checking of the return value on dir_emit(): "true" means success, keep going, but bch2_dir_emit() needs to return true when we're finished iterating. https://github.com/koverstreet/bcachefs/issues/867 Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 92ee59d9e00e..8a680e52c1ed 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -685,7 +685,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv vfs_d_type(d.v->d_type)); if (ret) ctx->pos = d.k->p.offset + 1; - return ret; + return !ret; } int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) @@ -710,7 +710,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) if (ret2 > 0) continue; - ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target)); + ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); }))); bch2_bkey_buf_exit(&sk, c); From 63f5235e0291152a2ac2c4ef3c1196cb6dfb3ef7 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Wed, 30 Apr 2025 18:18:43 +0800 Subject: [PATCH 131/239] ALSA: hda/realtek - Add more HP laptops which need mute led fixup More HP EliteBook with Realtek HDA codec ALC3247 and combined CS35L56 Amplifiers need quirk ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED. Signed-off-by: Chris Chiu Cc: Link: https://patch.msgid.link/20250430101843.150833-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7810d5f9b5aa..8a2b09e4a7d5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10863,8 +10863,11 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ded, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dee, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), From 650266ac4c7230c89bcd1307acf5c9c92cfa85e2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Apr 2025 11:05:54 +0300 Subject: [PATCH 132/239] dm: add missing unlock on in dm_keyslot_evict() We need to call dm_put_live_table() even if dm_get_live_table() returns NULL. Fixes: 9355a9eb21a5 ("dm: support key eviction from keyslot managers of underlying devices") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Dan Carpenter Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9e175c5e0634..31d67a1a91dd 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1173,7 +1173,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, t = dm_get_live_table(md, &srcu_idx); if (!t) - return 0; + goto put_live_table; for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1184,6 +1184,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, (void *)key); } +put_live_table: dm_put_live_table(md, srcu_idx); return 0; } From e6a3fc4f10b872d02e25f83227e725c79b25d893 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 30 Apr 2025 14:48:37 +0200 Subject: [PATCH 133/239] genirq/msi: Prevent NULL pointer dereference in msi_domain_debug_show() irq_domain_debug_show_one() calls msi_domain_debug_show() with a non-NULL domain pointer and a NULL irq_data pointer. irq_debug_show_data() calls it with a NULL domain pointer. The domain pointer is not used, but the irq_data pointer is required to be non-NULL and lacks a NULL pointer check. Add the missing NULL pointer check to ensure there is a non-NULL irq_data pointer in msi_domain_debug_show() before dereferencing it. [ tglx: Massaged change log ] Fixes: 01499ae673dc ("genirq/msi: Expose MSI message data in debugfs") Signed-off-by: Andrew Jones Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250430124836.49964-2-ajones@ventanamicro.com --- kernel/irq/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 5c8d43cdb0a3..c05ba7ca00fa 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -761,7 +761,7 @@ static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fw static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - struct msi_desc *desc = irq_data_get_msi_desc(irqd); + struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL; if (!desc) return; From edea92770a3b6454dc796fc5436a3315bb402181 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 30 Apr 2025 18:52:08 +0200 Subject: [PATCH 134/239] ASoC: stm32: sai: skip useless iterations on kernel rate loop the frequency of the kernel clock must be greater than or equal to the bitclock rate. When searching for a convenient kernel clock rate in stm32_sai_set_parent_rate() function, it is useless to continue the loop below bitclock rate, as it will result in a invalid kernel clock rate. Change the loop output condition. Fixes: 2cfe1ff22555 ("ASoC: stm32: sai: add stm32mp25 support") Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20250430165210.321273-2-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index e8c1abf1ae0a..4d018b4bc3f0 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -409,11 +409,11 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, unsigned int rate) { struct platform_device *pdev = sai->pdev; - unsigned int sai_ck_rate, sai_ck_max_rate, sai_curr_rate, sai_new_rate; + unsigned int sai_ck_rate, sai_ck_max_rate, sai_ck_min_rate, sai_curr_rate, sai_new_rate; int div, ret; /* - * Set maximum expected kernel clock frequency + * Set minimum and maximum expected kernel clock frequency * - mclk on or spdif: * f_sai_ck = MCKDIV * mclk-fs * fs * Here typical 256 ratio is assumed for mclk-fs @@ -423,13 +423,16 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * Set constraint MCKDIV * FRL <= 256, to ensure MCKDIV is in available range * f_sai_ck = sai_ck_max_rate * pow_of_two(FRL) / 256 */ + sai_ck_min_rate = rate * 256; if (!(rate % SAI_RATE_11K)) sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_11K * 256; else sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_8K * 256; - if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) + if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) { + sai_ck_min_rate = rate * sai->fs_length; sai_ck_max_rate /= DIV_ROUND_CLOSEST(256, roundup_pow_of_two(sai->fs_length)); + } /* * Request exclusivity, as the clock is shared by SAI sub-blocks and by @@ -472,7 +475,7 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, /* Try a lower frequency */ div++; sai_ck_rate = sai_ck_max_rate / div; - } while (sai_ck_rate > rate); + } while (sai_ck_rate >= sai_ck_min_rate); /* No accurate rate found */ dev_err(&pdev->dev, "Failed to find an accurate rate"); From cce34d113e2a592806abcdc02c7f8513775d8b20 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 30 Apr 2025 18:52:09 +0200 Subject: [PATCH 135/239] ASoC: stm32: sai: add a check on minimal kernel frequency On MP2 SoCs SAI kernel clock rate is managed through stm32_sai_set_parent_rate() function. If the kernel clock rate was set previously to a low frequency, this frequency may be too low to support the newly requested audio stream rate. However the stm32_sai_rate_accurate() will only check accuracy against the maximum kernel clock rate. The function will return leaving the kernel clock rate unchanged. Add a check on minimal frequency requirement, to avoid this. Fixes: 2cfe1ff22555 ("ASoC: stm32: sai: add stm32mp25 support") Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20250430165210.321273-3-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 4d018b4bc3f0..bf5299ba11c3 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -447,7 +447,10 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * return immediately. */ sai_curr_rate = clk_get_rate(sai->sai_ck); - if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate)) + dev_dbg(&pdev->dev, "kernel clock rate: min [%u], max [%u], current [%u]", + sai_ck_min_rate, sai_ck_max_rate, sai_curr_rate); + if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate) && + sai_curr_rate >= sai_ck_min_rate) return 0; /* From 02b44a2b2bdcee03cbb92484d31e9ca1b91b2a38 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 30 Apr 2025 11:31:19 +0100 Subject: [PATCH 136/239] ASoC: intel/sdw_utils: Add volume limit to cs42l43 speakers The volume control for cs42l43 speakers has a maximum gain of +31.5 dB. However, for many use cases, this can cause distorted audio, depending various factors, such as other signal-processing elements in the chain, for example if the audio passes through a gain control before reaching the codec or the signal path has been tuned for a particular maximum gain in the codec. In the case of systems which use the soc_sdw_cs42l43 driver, audio will likely be distorted in all cases above 0 dB, therefore add a volume limit of 128, which is 0 dB maximum volume inside this driver. Signed-off-by: Stefan Binding Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250430103134.24579-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_cs42l43.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_cs42l43.c b/sound/soc/sdw_utils/soc_sdw_cs42l43.c index 668c9d28a1c1..b415d45d520d 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs42l43.c +++ b/sound/soc/sdw_utils/soc_sdw_cs42l43.c @@ -20,6 +20,8 @@ #include #include +#define CS42L43_SPK_VOLUME_0DB 128 /* 0dB Max */ + static const struct snd_soc_dapm_route cs42l43_hs_map[] = { { "Headphone", NULL, "cs42l43 AMP3_OUT" }, { "Headphone", NULL, "cs42l43 AMP4_OUT" }, @@ -117,6 +119,14 @@ int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_so return -ENOMEM; } + ret = snd_soc_limit_volume(card, "cs42l43 Speaker Digital Volume", + CS42L43_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "cs42l43 speaker volume limit failed: %d\n", ret); + else + dev_info(card->dev, "Setting CS42L43 Speaker volume limit to %d\n", + CS42L43_SPK_VOLUME_0DB); + ret = snd_soc_dapm_add_routes(&card->dapm, cs42l43_spk_map, ARRAY_SIZE(cs42l43_spk_map)); if (ret) From d5463e531c128ff1b141fdba2e13345cd50028a4 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 30 Apr 2025 11:31:20 +0100 Subject: [PATCH 137/239] ASoC: intel/sdw_utils: Add volume limit to cs35l56 speakers The volume control for cs35l56 speakers has a maximum gain of +12 dB. However, for many use cases, this can cause distorted audio, depending various factors, such as other signal-processing elements in the chain, for example if the audio passes through a gain control before reaching the amp or the signal path has been tuned for a particular maximum gain in the amp. In the case of systems which use the soc_sdw_* driver, audio will likely be distorted in all cases above 0 dB, therefore add a volume limit of 400, which is 0 dB maximum volume inside this driver. The volume limit should be applied to both soundwire and soundwire bridge configurations. Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250430103134.24579-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c | 4 ++++ sound/soc/sdw_utils/soc_sdw_cs_amp.c | 24 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 36a4a1e1d8ca..d8bd5d37131a 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -226,6 +226,7 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, bool playback); int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix); /* MAXIM codec support */ int asoc_sdw_maxim_init(struct snd_soc_card *card, diff --git a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c index 246e5c2e0af5..c7e55f443351 100644 --- a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c +++ b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c @@ -60,6 +60,10 @@ static int asoc_sdw_bridge_cs35l56_asp_init(struct snd_soc_pcm_runtime *rtd) /* 4 x 16-bit sample slots and FSYNC=48000, BCLK=3.072 MHz */ for_each_rtd_codec_dais(rtd, i, codec_dai) { + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dai_set_tdm_slot(codec_dai, tx_mask, rx_mask, 4, 16); if (ret < 0) return ret; diff --git a/sound/soc/sdw_utils/soc_sdw_cs_amp.c b/sound/soc/sdw_utils/soc_sdw_cs_amp.c index 4b6181cf2971..35b550bcd4de 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs_amp.c +++ b/sound/soc/sdw_utils/soc_sdw_cs_amp.c @@ -16,6 +16,25 @@ #define CODEC_NAME_SIZE 8 #define CS_AMP_CHANNELS_PER_AMP 4 +#define CS35L56_SPK_VOLUME_0DB 400 /* 0dB Max */ + +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix) +{ + char *volume_ctl_name; + int ret; + + volume_ctl_name = kasprintf(GFP_KERNEL, "%s Speaker Volume", name_prefix); + if (!volume_ctl_name) + return -ENOMEM; + + ret = snd_soc_limit_volume(card, volume_ctl_name, CS35L56_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "%s limit set failed: %d\n", volume_ctl_name, ret); + + kfree(volume_ctl_name); + return ret; +} +EXPORT_SYMBOL_NS(asoc_sdw_cs35l56_volume_limit, "SND_SOC_SDW_UTILS"); int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) { @@ -40,6 +59,11 @@ int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai snprintf(widget_name, sizeof(widget_name), "%s SPK", codec_dai->component->name_prefix); + + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dapm_add_routes(&card->dapm, &route, 1); if (ret) return ret; From 3cc393d2232ec770b5f79bf0673d67702a3536c3 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 29 Apr 2025 11:49:10 +0200 Subject: [PATCH 138/239] ASoC: simple-card-utils: Fix pointer check in graph_util_parse_link_direction Actually check if the passed pointers are valid, before writing to them. This also fixes a USBAN warning: UBSAN: invalid-load in ../sound/soc/fsl/imx-card.c:687:25 load of value 255 is not a valid value for type '_Bool' This is because playback_only is uninitialized and is not written to, as the playback-only property is absent. Fixes: 844de7eebe97 ("ASoC: audio-graph-card2: expand dai_link property part") Signed-off-by: Alexander Stein Link: https://patch.msgid.link/20250429094910.1150970-1-alexander.stein@ew.tq-group.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a1ccc300e68c..3ae2a212a2e3 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1174,9 +1174,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (is_playback_only) + if (playback_only) *playback_only = is_playback_only; - if (is_capture_only) + if (capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); From 7f91f012c1df07af6b915d1f8cece202774bb50e Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 May 2025 01:24:43 +0530 Subject: [PATCH 139/239] ASoC: amd: ps: fix for irq handler return status If any Soundwire manager interrupt is reported, and wake interrupt is not reported, in this scenario irq_flag will be set to zero, which results in interrupt handler return status as IRQ_NONE. Add new irq flag 'wake_irq_flag' check for SoundWire wake interrupt handling to fix incorrect irq handling return status. Fixes: 3898b189079c8 ("ASoC: amd: ps: add soundwire wake interrupt handling") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20250430195517.3065308-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/pci-ps.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 8e57f31ef7f7..7936b3173632 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -193,6 +193,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) struct amd_sdw_manager *amd_manager; u32 ext_intr_stat, ext_intr_stat1; u16 irq_flag = 0; + u16 wake_irq_flag = 0; u16 sdw_dma_irq_flag = 0; adata = dev_id; @@ -231,7 +232,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) } if (adata->acp_rev >= ACP70_PCI_REV) - irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); + wake_irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); if (ext_intr_stat & BIT(PDM_DMA_STAT)) { ps_pdm_data = dev_get_drvdata(&adata->pdm_dev->dev); @@ -245,7 +246,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) if (sdw_dma_irq_flag) return IRQ_WAKE_THREAD; - if (irq_flag) + if (irq_flag | wake_irq_flag) return IRQ_HANDLED; else return IRQ_NONE; From 05450c48a35810c5025cffb41dbf566cfb122415 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 23:18:49 -0400 Subject: [PATCH 140/239] bcachefs: Kill ERO in __bch2_i_sectors_acct() We won't be root causing this in the immediate future, and it's fairly innocuous - so just log it in the superblock. https://github.com/koverstreet/bcachefs/issues/869 Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 23 +++++++++++++++++++---- fs/bcachefs/sb-errors_format.h | 5 ++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 65c2c33d253d..28619b6bd327 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -144,10 +144,25 @@ int __must_check bch2_write_inode_size(struct bch_fs *c, void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { - bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, - "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, sectors, - inode->ei_inode.bi_sectors); + if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, sectors, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, buf.buf, &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (sectors < 0) + sectors = -inode->v.i_blocks; + else + sectors = 0; + } + inode->v.i_blocks += sectors; #ifdef CONFIG_BCACHEFS_QUOTA diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index a4ad5924107b..582e77cbaf8f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -236,6 +236,7 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -317,7 +318,9 @@ enum bch_fsck_flags { x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ x(dirent_cf_name_too_big, 304, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \ - x(MAX, 308, 0) + x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ + x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ + x(MAX, 312, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 3a72e369412d6bf7f6a8af410ac4bdd7d48deb62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 23:56:00 -0400 Subject: [PATCH 141/239] bcachefs: check for inode.bi_sectors underflow Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 21 +++++++++++++++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index a418fa62f09d..c1237da079ed 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -255,6 +255,27 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, } if (i_sectors_delta) { + s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); + if (unlikely(bi_sectors + i_sectors_delta < 0)) { + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", + extent_iter->pos.inode, bi_sectors, i_sectors_delta); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, inode_i_sectors_underflow, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (i_sectors_delta < 0) + i_sectors_delta = -bi_sectors; + else + i_sectors_delta = 0; + } + le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); inode_update_flags = 0; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 582e77cbaf8f..822a0b42432d 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -236,6 +236,7 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ @@ -320,7 +321,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 312, 0) + x(MAX, 313, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From e660d7ca7488214ae19d964d196f89b4237ec829 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 May 2025 00:01:29 -0400 Subject: [PATCH 142/239] bcachefs: Kill ERO for i_blocks check in truncate Replace with logging the error in the superblock. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 21 ++++++++++++++++----- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 28619b6bd327..9657144666b8 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -517,11 +517,22 @@ int bchfs_truncate(struct mnt_idmap *idmap, goto err; } - bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && - !bch2_journal_error(&c->journal), c, - "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, - inode->ei_inode.bi_sectors); + if (unlikely(!inode->v.i_size && inode->v.i_blocks && + !bch2_journal_error(&c->journal))) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + } ret = bch2_setattr_nonsize(idmap, inode, iattr); err: diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 822a0b42432d..3b69a924086f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -238,6 +238,7 @@ enum bch_fsck_flags { x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -321,7 +322,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 313, 0) + x(MAX, 314, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From c59f7c9661b9d3ee33a21d7b4f1dd4b77079e3e7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 30 Apr 2025 20:15:48 -0300 Subject: [PATCH 143/239] smb: client: ensure aligned IO sizes Make all IO sizes multiple of PAGE_SIZE, either negotiated by the server or passed through rsize, wsize and bsize mount options, to prevent from breaking DIO reads and writes against servers that enforce alignment as specified in MS-FSA 2.1.5.3 and 2.1.5.4. Cc: linux-cifs@vger.kernel.org Reviewed-by: David Howells Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/connect.c | 23 +------------------ fs/smb/client/file.c | 6 ++--- fs/smb/client/fs_context.c | 25 +++++--------------- fs/smb/client/fs_context.h | 47 ++++++++++++++++++++++++++++++++++++++ fs/smb/client/smb1ops.c | 8 +++---- fs/smb/client/smb2pdu.c | 8 ++----- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index df976ce6aed9..6bf04d9a5491 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3753,28 +3753,7 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) } } - /* - * Clamp the rsize/wsize mount arguments if they are too big for the server - * and set the rsize/wsize to the negotiated values if not passed in by - * the user on mount - */ - if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { - cifs_sb->ctx->wsize = - round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); - /* - * in the very unlikely event that the server sent a max write size under PAGE_SIZE, - * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 - */ - if (cifs_sb->ctx->wsize == 0) { - cifs_sb->ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); - } - } - if ((cifs_sb->ctx->rsize == 0) || - (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) - cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); - + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); /* * The cookie is initialized from volume info returned above. * Inside cifs_fscache_get_super_cookie it checks diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9e8f404b9e56..851b74f557c1 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -160,10 +160,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), - cifs_sb->ctx); + cifs_negotiate_rsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 2980941b9667..a634a34d4086 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1021,6 +1021,7 @@ static int smb3_reconfigure(struct fs_context *fc) struct dentry *root = fc->root; struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; + unsigned int rsize = ctx->rsize, wsize = ctx->wsize; char *new_password = NULL, *new_password2 = NULL; bool need_recon = false; int rc; @@ -1103,11 +1104,8 @@ static int smb3_reconfigure(struct fs_context *fc) STEAL_STRING(cifs_sb, ctx, iocharset); /* if rsize or wsize not passed in on remount, use previous values */ - if (ctx->rsize == 0) - ctx->rsize = cifs_sb->ctx->rsize; - if (ctx->wsize == 0) - ctx->wsize = cifs_sb->ctx->wsize; - + ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; + ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); @@ -1312,7 +1310,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, __func__); goto cifs_parse_mount_err; } - ctx->bsize = result.uint_32; + ctx->bsize = CIFS_ALIGN_BSIZE(fc, result.uint_32); ctx->got_bsize = true; break; case Opt_rasize: @@ -1336,24 +1334,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->rasize = result.uint_32; break; case Opt_rsize: - ctx->rsize = result.uint_32; + ctx->rsize = CIFS_ALIGN_RSIZE(fc, result.uint_32); ctx->got_rsize = true; ctx->vol_rsize = ctx->rsize; break; case Opt_wsize: - ctx->wsize = result.uint_32; + ctx->wsize = CIFS_ALIGN_WSIZE(fc, result.uint_32); ctx->got_wsize = true; - if (ctx->wsize % PAGE_SIZE != 0) { - ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); - if (ctx->wsize == 0) { - ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); - } else { - cifs_dbg(VFS, - "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", - ctx->wsize, PAGE_SIZE); - } - } ctx->vol_wsize = ctx->wsize; break; case Opt_acregmax: diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index d1d29249bcdb..9e83302ce4b8 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -20,6 +20,21 @@ cifs_dbg(VFS, fmt, ## __VA_ARGS__); \ } while (0) +static inline size_t cifs_io_align(struct fs_context *fc, + const char *name, size_t size) +{ + if (!size || !IS_ALIGNED(size, PAGE_SIZE)) { + cifs_errorf(fc, "unaligned %s, making it a multiple of %lu bytes\n", + name, PAGE_SIZE); + size = umax(round_down(size, PAGE_SIZE), PAGE_SIZE); + } + return size; +} + +#define CIFS_ALIGN_WSIZE(_fc, _size) cifs_io_align(_fc, "wsize", _size) +#define CIFS_ALIGN_RSIZE(_fc, _size) cifs_io_align(_fc, "rsize", _size) +#define CIFS_ALIGN_BSIZE(_fc, _size) cifs_io_align(_fc, "bsize", _size) + enum smb_version { Smb_1 = 1, Smb_20, @@ -361,4 +376,36 @@ static inline void cifs_mount_unlock(void) mutex_unlock(&cifs_mount_mutex); } +static inline void cifs_negotiate_rsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_rsize(tcon, ctx), PAGE_SIZE); + if (ctx->rsize) + size = umax(umin(ctx->rsize, size), PAGE_SIZE); + ctx->rsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_wsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + if (ctx->wsize) + size = umax(umin(ctx->wsize, size), PAGE_SIZE); + ctx->wsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_iosize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + cifs_negotiate_rsize(server, ctx, tcon); + cifs_negotiate_wsize(server, ctx, tcon); +} + #endif diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index ab26b9e9b0e7..b27a182629ec 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -432,7 +432,7 @@ cifs_negotiate(const unsigned int xid, } static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -467,7 +467,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) } static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -1342,8 +1342,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, - .negotiate_wsize = cifs_negotiate_wsize, - .negotiate_rsize = cifs_negotiate_rsize, + .negotiate_wsize = smb1_negotiate_wsize, + .negotiate_rsize = smb1_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a9a49555d43b..0b35816d551f 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4093,12 +4093,8 @@ static void cifs_renegotiate_iosize(struct TCP_Server_Info *server, return; spin_lock(&tcon->sb_list_lock); - list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) { - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tcon, cifs_sb->ctx); - cifs_sb->ctx->wsize = - server->ops->negotiate_wsize(tcon, cifs_sb->ctx); - } + list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); spin_unlock(&tcon->sb_list_lock); } From be593d9d91c5a3a363d456b9aceb71029aeb3f1d Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 17 Apr 2025 16:50:05 -0500 Subject: [PATCH 144/239] drm/amd/display: Fix slab-use-after-free in hdcp The HDCP code in amdgpu_dm_hdcp.c copies pointers to amdgpu_dm_connector objects without incrementing the kref reference counts. When using a USB-C dock, and the dock is unplugged, the corresponding amdgpu_dm_connector objects are freed, creating dangling pointers in the HDCP code. When the dock is plugged back, the dangling pointers are dereferenced, resulting in a slab-use-after-free: [ 66.775837] BUG: KASAN: slab-use-after-free in event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776171] Read of size 4 at addr ffff888127804120 by task kworker/0:1/10 [ 66.776179] CPU: 0 UID: 0 PID: 10 Comm: kworker/0:1 Not tainted 6.14.0-rc7-00180-g54505f727a38-dirty #233 [ 66.776183] Hardware name: HP HP Pavilion Aero Laptop 13-be0xxx/8916, BIOS F.17 12/18/2024 [ 66.776186] Workqueue: events event_property_validate [amdgpu] [ 66.776494] Call Trace: [ 66.776496] [ 66.776497] dump_stack_lvl+0x70/0xa0 [ 66.776504] print_report+0x175/0x555 [ 66.776507] ? __virt_addr_valid+0x243/0x450 [ 66.776510] ? kasan_complete_mode_report_info+0x66/0x1c0 [ 66.776515] kasan_report+0xeb/0x1c0 [ 66.776518] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776819] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777121] __asan_report_load4_noabort+0x14/0x20 [ 66.777124] event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777342] ? __lock_acquire+0x6b40/0x6b40 [ 66.777347] ? enable_assr+0x250/0x250 [amdgpu] [ 66.777571] process_one_work+0x86b/0x1510 [ 66.777575] ? pwq_dec_nr_in_flight+0xcf0/0xcf0 [ 66.777578] ? assign_work+0x16b/0x280 [ 66.777580] ? lock_is_held_type+0xa3/0x130 [ 66.777583] worker_thread+0x5c0/0xfa0 [ 66.777587] ? process_one_work+0x1510/0x1510 [ 66.777588] kthread+0x3a2/0x840 [ 66.777591] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777594] ? trace_hardirqs_on+0x4f/0x60 [ 66.777597] ? _raw_spin_unlock_irq+0x27/0x60 [ 66.777599] ? calculate_sigpending+0x77/0xa0 [ 66.777602] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777605] ret_from_fork+0x40/0x90 [ 66.777607] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777609] ret_from_fork_asm+0x11/0x20 [ 66.777614] [ 66.777643] Allocated by task 10: [ 66.777646] kasan_save_stack+0x39/0x60 [ 66.777649] kasan_save_track+0x14/0x40 [ 66.777652] kasan_save_alloc_info+0x37/0x50 [ 66.777655] __kasan_kmalloc+0xbb/0xc0 [ 66.777658] __kmalloc_cache_noprof+0x1c8/0x4b0 [ 66.777661] dm_dp_add_mst_connector+0xdd/0x5c0 [amdgpu] [ 66.777880] drm_dp_mst_port_add_connector+0x47e/0x770 [drm_display_helper] [ 66.777892] drm_dp_send_link_address+0x1554/0x2bf0 [drm_display_helper] [ 66.777901] drm_dp_check_and_send_link_address+0x187/0x1f0 [drm_display_helper] [ 66.777909] drm_dp_mst_link_probe_work+0x2b8/0x410 [drm_display_helper] [ 66.777917] process_one_work+0x86b/0x1510 [ 66.777919] worker_thread+0x5c0/0xfa0 [ 66.777922] kthread+0x3a2/0x840 [ 66.777925] ret_from_fork+0x40/0x90 [ 66.777927] ret_from_fork_asm+0x11/0x20 [ 66.777932] Freed by task 1713: [ 66.777935] kasan_save_stack+0x39/0x60 [ 66.777938] kasan_save_track+0x14/0x40 [ 66.777940] kasan_save_free_info+0x3b/0x60 [ 66.777944] __kasan_slab_free+0x52/0x70 [ 66.777946] kfree+0x13f/0x4b0 [ 66.777949] dm_dp_mst_connector_destroy+0xfa/0x150 [amdgpu] [ 66.778179] drm_connector_free+0x7d/0xb0 [ 66.778184] drm_mode_object_put.part.0+0xee/0x160 [ 66.778188] drm_mode_object_put+0x37/0x50 [ 66.778191] drm_atomic_state_default_clear+0x220/0xd60 [ 66.778194] __drm_atomic_state_free+0x16e/0x2a0 [ 66.778197] drm_mode_atomic_ioctl+0x15ed/0x2ba0 [ 66.778200] drm_ioctl_kernel+0x17a/0x310 [ 66.778203] drm_ioctl+0x584/0xd10 [ 66.778206] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] [ 66.778375] __x64_sys_ioctl+0x139/0x1a0 [ 66.778378] x64_sys_call+0xee7/0xfb0 [ 66.778381] do_syscall_64+0x87/0x140 [ 66.778385] entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fix this by properly incrementing and decrementing the reference counts when making and deleting copies of the amdgpu_dm_connector pointers. (Mario: rebase on current code and update fixes tag) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Signed-off-by: Chris Bainbridge Fixes: da3fd7ac0bcf3 ("drm/amd/display: Update CP property based on HW query") Reviewed-by: Alex Hung Link: https://lore.kernel.org/r/20250417215005.37964-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit d4673f3c3b3dcb74e36e53cdfc880baa7a87b330) Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 5198a079b463..8f22ad966543 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -488,6 +497,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -544,7 +554,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } From 9397204ffae887bd557e7053609174b3eb9d6f5c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 17 Apr 2025 12:02:09 -0400 Subject: [PATCH 145/239] drm/amdgpu: Fail DMABUF map of XGMI-accessible memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer memory is XGMI-accessible, we should never access it through PCIe P2P DMA mappings. PCIe P2P is slower, has different coherence behaviour, limited or no support for atomics, or may not work at all. Fail with a warning if DMABUF mappings of such memory are attempted. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit dbe4c63689bc6b5fd3ab72650ea4b6a667e96a68) --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e6913fcf2c7b..44e120f9f764 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -199,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); From 79af0604eb80ca1f86a1f265a0b1f9d4fccbc18f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 21 Apr 2025 13:25:51 +0530 Subject: [PATCH 146/239] drm/amdgpu: Fix offset for HDP remap in nbio v7.11 APUs in passthrough mode use HDP flush. 0x7F000 offset used for remapping HDP flush is mapped to VPE space which could get power gated. Use another unused offset in BIF space. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit d8116a32cdbe456c7f511183eb9ab187e3d590fb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec1..bed5ef4d8788 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { From 6718b10a5b98ad6629cd6b2004b0628fe68beac0 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Wed, 23 Apr 2025 12:32:01 -0400 Subject: [PATCH 147/239] drm/amdgpu: Add DPG pause for VCN v5.0.1 For vcn5.0.1 only, enable DPG PAUSE to avoid DPG resets. Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 3e5f86c14c3440171f2a3e7a68ceb739297726e9) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d9..e0e84ef7f568 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -502,6 +502,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { } +/** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + /** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * @@ -518,6 +564,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +629,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -775,9 +825,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); From e2699274d5a43b95af1b806aa6e3b1157107665d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 22:37:13 -0400 Subject: [PATCH 148/239] bcachefs: Fix __bch2_dev_group_set() bch2_sb_disk_groups_to_cpu() goes off of the superblock member info, so we need to set that first. Reported-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 1186280b29e9..2ca3cbf12b71 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -470,23 +470,22 @@ inval: int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - struct bch_member *mi; - int ret, v = -1; + lockdep_assert_held(&c->sb_lock); - if (!strlen(name) || !strcmp(name, "none")) - return 0; - v = bch2_disk_path_find_or_create(&c->disk_sb, name); - if (v < 0) - return v; + if (!strlen(name) || !strcmp(name, "none")) { + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, 0); + } else { + int v = bch2_disk_path_find_or_create(&c->disk_sb, name); + if (v < 0) + return v; - ret = bch2_sb_disk_groups_to_cpu(c); - if (ret) - return ret; + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, v + 1); + } - mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_GROUP(mi, v + 1); - return 0; + return bch2_sb_disk_groups_to_cpu(c); } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) From 5a295bad38b1057dd13811242ac981bb674ab190 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Thu, 17 Apr 2025 17:07:17 -0700 Subject: [PATCH 149/239] drm/xe/eustall: Resolve a possible circular locking dependency Use a separate lock in the polling function eu_stall_data_buf_poll() instead of eu_stall->stream_lock. This would prevent a possible circular locking dependency leading to a deadlock as described below. This would also require additional locking with the new lock in the read function. <4> [787.192986] ====================================================== <4> [787.192988] WARNING: possible circular locking dependency detected <4> [787.192991] 6.14.0-rc7-xe+ #1 Tainted: G U <4> [787.192993] ------------------------------------------------------ <4> [787.192994] xe_eu_stall/20093 is trying to acquire lock: <4> [787.192996] ffff88819847e2c0 ((work_completion) (&(&stream->buf_poll_work)->work)), at: __flush_work+0x1f8/0x5e0 <4> [787.193005] but task is already holding lock: <4> [787.193007] ffff88814ce83ba8 (>->eu_stall->stream_lock){3:3}, at: xe_eu_stall_stream_ioctl+0x41/0x6a0 [xe] <4> [787.193090] which lock already depends on the new lock. <4> [787.193093] the existing dependency chain (in reverse order) is: <4> [787.193095] -> #1 (>->eu_stall->stream_lock){+.+.}-{3:3}: <4> [787.193099] __mutex_lock+0xb4/0xe40 <4> [787.193104] mutex_lock_nested+0x1b/0x30 <4> [787.193106] eu_stall_data_buf_poll_work_fn+0x44/0x1d0 [xe] <4> [787.193155] process_one_work+0x21c/0x740 <4> [787.193159] worker_thread+0x1db/0x3c0 <4> [787.193161] kthread+0x10d/0x270 <4> [787.193164] ret_from_fork+0x44/0x70 <4> [787.193168] ret_from_fork_asm+0x1a/0x30 <4> [787.193172] -> #0 ((work_completion)(&(&stream->buf_poll_work)->work)){+.+.}-{0:0}: <4> [787.193176] __lock_acquire+0x1637/0x2810 <4> [787.193180] lock_acquire+0xc9/0x300 <4> [787.193183] __flush_work+0x219/0x5e0 <4> [787.193186] cancel_delayed_work_sync+0x87/0x90 <4> [787.193189] xe_eu_stall_disable_locked+0x9a/0x260 [xe] <4> [787.193237] xe_eu_stall_stream_ioctl+0x5b/0x6a0 [xe] <4> [787.193285] __x64_sys_ioctl+0xa4/0xe0 <4> [787.193289] x64_sys_call+0x131e/0x2650 <4> [787.193292] do_syscall_64+0x91/0x180 <4> [787.193295] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4> [787.193299] other info that might help us debug this: <4> [787.193302] Possible unsafe locking scenario: <4> [787.193304] CPU0 CPU1 <4> [787.193305] ---- ---- <4> [787.193306] lock(>->eu_stall->stream_lock); <4> [787.193308] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193311] lock(>->eu_stall->stream_lock); <4> [787.193313] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193315] *** DEADLOCK *** Fixes: 760edec939685 ("drm/xe/eustall: Add support to read() and poll() EU stall data") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4598 Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/c896932fca84f79db2df5942911997ed77b2b9b6.1744934656.git.harish.chegondi@intel.com (cherry picked from commit c2b1f1b8641372bb2e563c49eb25632623a860fc) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c..78f28f3c5e5c 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -378,7 +380,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +398,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +513,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +531,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +588,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +724,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; From 1d622a4fe2b9a30cd4af2e858d793d05f8a82774 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 20 Apr 2025 22:59:01 -0700 Subject: [PATCH 150/239] drm/xe/eustall: Do not support EU stall on SRIOV VF EU stall sampling is not supported on SRIOV VF. Do not initialize or open EU stall stream on SRIOV VF. Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/10db5d1c7e17aadca7078ff74575b7ffc0d5d6b8.1745215022.git.harish.chegondi@intel.com (cherry picked from commit 6ed20625a4b8189a1bd6598aa58e03147ce378ee) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 3 +++ drivers/gpu/drm/xe/xe_eu_stall.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 78f28f3c5e5c..e2bb156c71fb 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -210,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f233566..d1c76e503799 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif From fee4d171451c1ad9e8aaf65fc0ab7d143a33bd72 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 May 2025 11:47:47 +0100 Subject: [PATCH 151/239] arm64: errata: Add missing sentinels to Spectre-BHB MIDR arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") added some additional CPUs to the Spectre-BHB workaround, including some new arrays for designs that require new 'k' values for the workaround to be effective. Unfortunately, the new arrays omitted the sentinel entry and so is_midr_in_range_list() will walk off the end when it doesn't find a match. With UBSAN enabled, this leads to a crash during boot when is_midr_in_range_list() is inlined (which was more common prior to c8c2647e69be ("arm64: Make  _midr_in_range_list() an exported function")): | Internal error: aarch64 BRK: 00000000f2000001 [#1] PREEMPT SMP | pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : spectre_bhb_loop_affected+0x28/0x30 | lr : is_spectre_bhb_affected+0x170/0x190 | [...] | Call trace: | spectre_bhb_loop_affected+0x28/0x30 | update_cpu_capabilities+0xc0/0x184 | init_cpu_features+0x188/0x1a4 | cpuinfo_store_boot_cpu+0x4c/0x60 | smp_prepare_boot_cpu+0x38/0x54 | start_kernel+0x8c/0x478 | __primary_switched+0xc8/0xd4 | Code: 6b09011f 54000061 52801080 d65f03c0 (d4200020) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: aarch64 BRK: Fatal exception Add the missing sentinel entries. Cc: Lee Jones Cc: James Morse Cc: Doug Anderson Cc: Shameer Kolothum Cc: Reported-by: Greg Kroah-Hartman Fixes: a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") Signed-off-by: Will Deacon Reviewed-by: Lee Jones Reviewed-by: Douglas Anderson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250501104747.28431-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/proton-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index b198dde79e59..b607f6dfc5e6 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -879,10 +879,12 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), From 28580052e634fe8fa327e6f25c35590374be754b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 May 2025 12:38:00 -0400 Subject: [PATCH 152/239] bcachefs: add missing sched_annotate_sleep() 00594 ------------[ cut here ]------------ 00594 do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000003e51ef4a>] prepare_to_wait_event+0x5c/0x1c0 00594 WARNING: CPU: 12 PID: 1117 at kernel/sched/core.c:8741 __might_sleep+0x74/0x88 00594 Modules linked in: 00594 CPU: 12 UID: 0 PID: 1117 Comm: umount Not tainted 6.15.0-rc4-ktest-g3a72e369412d #21845 PREEMPT 00594 Hardware name: linux,dummy-virt (DT) 00594 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00594 pc : __might_sleep+0x74/0x88 00594 lr : __might_sleep+0x74/0x88 00594 sp : ffffff80c8d67a90 00594 x29: ffffff80c8d67a90 x28: ffffff80f5903500 x27: 0000000000000000 00594 x26: 0000000000000000 x25: ffffff80cf5002a0 x24: ffffffc087dad000 00594 x23: ffffff80c8d67b40 x22: 0000000000000000 x21: 0000000000000000 00594 x20: 0000000000000242 x19: ffffffc080b92020 x18: 00000000ffffffff 00594 x17: 30303c5b20746120 x16: 74657320323d6574 x15: 617473203b474e49 00594 x14: 0000000000000001 x13: 00000000000c0000 x12: ffffff80facc0000 00594 x11: 0000000000000001 x10: 0000000000000001 x9 : ffffffc0800b0774 00594 x8 : c0000000fffbffff x7 : ffffffc087dac670 x6 : 00000000015fffa8 00594 x5 : ffffff80facbffa8 x4 : ffffff80fbd30b90 x3 : 0000000000000000 00594 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff80f5903500 00594 Call trace: 00594 __might_sleep+0x74/0x88 (P) 00594 __mutex_lock+0x64/0x8d8 00594 mutex_lock_nested+0x28/0x38 00594 bch2_fs_ec_flush+0xf8/0x128 00594 __bch2_fs_read_only+0x54/0x1d8 00594 bch2_fs_read_only+0x3e0/0x438 00594 __bch2_fs_stop+0x5c/0x250 00594 bch2_put_super+0x18/0x28 00594 generic_shutdown_super+0x6c/0x140 00594 bch2_kill_sb+0x1c/0x38 00594 deactivate_locked_super+0x54/0xd0 00594 deactivate_super+0x70/0x90 00594 cleanup_mnt+0xec/0x188 00594 __cleanup_mnt+0x18/0x28 00594 task_work_run+0x90/0xd8 00594 do_notify_resume+0x138/0x148 00594 el0_svc+0x9c/0xa0 00594 el0t_64_sync_handler+0x104/0x130 00594 el0t_64_sync+0x154/0x158 Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a396865e8b17..fff58b78327c 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2204,10 +2204,10 @@ void bch2_fs_ec_stop(struct bch_fs *c) static bool bch2_fs_ec_flush_done(struct bch_fs *c) { - bool ret; + sched_annotate_sleep(); mutex_lock(&c->ec_stripe_new_lock); - ret = list_empty(&c->ec_stripe_new_list); + bool ret = list_empty(&c->ec_stripe_new_list); mutex_unlock(&c->ec_stripe_new_lock); return ret; From f5178c41bb43444a6008150fe6094497135d07cb Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 22 Apr 2025 20:30:25 +0900 Subject: [PATCH 153/239] tracing: Fix oob write in trace_seq_to_buffer() syzbot reported this bug: ================================================================== BUG: KASAN: slab-out-of-bounds in trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] BUG: KASAN: slab-out-of-bounds in tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 Write of size 4507 at addr ffff888032b6b000 by task syz.2.320/7260 CPU: 1 UID: 0 PID: 7260 Comm: syz.2.320 Not tainted 6.15.0-rc1-syzkaller-00301-g3bde70a2c827 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 __asan_memcpy+0x3c/0x60 mm/kasan/shadow.c:106 trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 .... ================================================================== It has been reported that trace_seq_to_buffer() tries to copy more data than PAGE_SIZE to buf. Therefore, to prevent this, we should use the smaller of trace_seq_used(&iter->seq) and PAGE_SIZE as an argument. Link: https://lore.kernel.org/20250422113026.13308-1-aha310510@gmail.com Reported-by: syzbot+c8cd2d2c412b868263fb@syzkaller.appspotmail.com Fixes: 3c56819b14b0 ("tracing: splice support for tracing_pipe") Suggested-by: Steven Rostedt Signed-off-by: Jeongjun Park Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8ddf6b17215c..6d52dc108f00 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6821,13 +6821,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } From 3c1d9cfa8458a4d6b6cd9bc3ca7bb1591130a31c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Apr 2025 23:13:35 +0100 Subject: [PATCH 154/239] ftrace: Fix NULL memory allocation check The check for a failed memory location is incorrectly checking the wrong level of pointer indirection by checking !filter_hash rather than !*filter_hash. Fix this. Cc: asami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250422221335.89896-1-colin.i.king@gmail.com Fixes: 0ae6b8ce200d ("ftrace: Fix accounting of subop hashes") Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 61130bb34d6c..6981830c3128 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3436,7 +3436,7 @@ static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash ** /* Copy the subops hash */ *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); - if (!filter_hash) + if (!*filter_hash) return -ENOMEM; /* Remove any notrace functions from the copy */ remove_hash(*filter_hash, subops_hash->notrace_hash); From 1be8e54a1e0f0a4bf70e3d65f94ca1738ee4f1f3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 15:19:09 -0400 Subject: [PATCH 155/239] tracing: Fix trace_adjust_address() when there is no modules in scratch area The function trace_adjust_address() is used to map addresses of modules stored in the persistent memory and are also loaded in the current boot to return the current address for the module. If there's only one module entry, it will simply use that, otherwise it performs a bsearch of the entry array to find the modules to offset with. The issue is if there are no modules in the array. The code does not account for that and ends up referencing the first element in the array which does not exist and causes a crash. If nr_entries is zero, exit out early as if this was a core kernel address. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501151909.65910359@gandalf.local.home Fixes: 35a380ddbc653 ("tracing: Show last module text symbols in the stacktrace") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6d52dc108f00..5b8db27fb6ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6043,8 +6043,10 @@ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) tscratch = tr->scratch; /* if there is no tscrach, module_delta must be NULL. */ module_delta = READ_ONCE(tr->module_delta); - if (!module_delta || tscratch->entries[0].mod_addr > addr) + if (!module_delta || !tscratch->nr_entries || + tscratch->entries[0].mod_addr > addr) { return addr + tr->text_delta; + } /* Note that entries must be sorted. */ nr_entries = tscratch->nr_entries; From 4426e6b4ecf632bb75d973051e1179b8bfac2320 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Wed, 23 Apr 2025 21:03:03 -0500 Subject: [PATCH 156/239] spi: tegra114: Don't fail set_cs_timing when delays are zero The original code would skip null delay pointers, but when the pointers were converted to point within the spi_device struct, the check was not updated to skip delays of zero. Hence all spi devices that didn't set delays would fail to probe. Fixes: 04e6bb0d6bb1 ("spi: modify set_cs_timing parameter") Cc: stable@vger.kernel.org Signed-off-by: Aaron Kling Link: https://patch.msgid.link/20250423-spi-tegra114-v1-1-2d608bcc12f9@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra114.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 3822d7c8d8ed..2a8bb798e95b 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if (setup->unit != SPI_DELAY_UNIT_SCK || - hold->unit != SPI_DELAY_UNIT_SCK || - inactive->unit != SPI_DELAY_UNIT_SCK) { + if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); From 6846100b00d97d3d6f05766ae86a0d821d849e78 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Fri, 2 May 2025 04:01:31 +0800 Subject: [PATCH 157/239] bcachefs: Remove incorrect __counted_by annotation This actually reverts 86e92eeeb237 ("bcachefs: Annotate struct bch_xattr with __counted_by()"). After the x_name, there is a value. According to the disscussion[1], __counted_by assumes that the flexible array member contains exactly the amount of elements that are specified. Now there are users came across a false positive detection of an out of bounds write caused by the __counted_by here[2], so revert that. [1] https://lore.kernel.org/lkml/Zv8VDKWN1GzLRT-_@archlinux/T/#m0ce9541c5070146320efd4f928cc1ff8de69e9b2 [2] https://privatebin.net/?a0d4e97d590d71e1#9bLmp2Kb5NU6X6cZEucchDcu88HzUQwHUah8okKPReEt Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr_format.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h index c7916011ef34..67426e33d04e 100644 --- a/fs/bcachefs/xattr_format.h +++ b/fs/bcachefs/xattr_format.h @@ -13,7 +13,13 @@ struct bch_xattr { __u8 x_type; __u8 x_name_len; __le16 x_val_len; - __u8 x_name[] __counted_by(x_name_len); + /* + * x_name contains the name and value counted by + * x_name_len + x_val_len. The introduction of + * __counted_by(x_name_len) caused a false positive + * detection of an out of bounds write. + */ + __u8 x_name[]; } __packed __aligned(8); #endif /* _BCACHEFS_XATTR_FORMAT_H */ From 53e3e5babc0963a92d856a5ec0ce92c59f54bc12 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 30 Apr 2025 11:18:28 +0900 Subject: [PATCH 158/239] ksmbd: prevent rename with empty string Client can send empty newname string to ksmbd server. It will cause a kernel oops from d_alloc. This patch return the error when attempting to rename a file or directory with an empty new name string. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 46aa08245742..f2a2be8467c6 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -633,6 +633,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); From eb4447bcce915b43b691123118893fca4f372a8f Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Wed, 30 Apr 2025 11:16:23 +0800 Subject: [PATCH 159/239] ksmbd: fix memory leak in parse_lease_state() The previous patch that added bounds check for create lease context introduced a memory leak. When the bounds check fails, the function returns NULL without freeing the previously allocated lease_ctx_info structure. This patch fixes the issue by adding kfree(lreq) before returning NULL in both boundary check cases. Fixes: bab703ed8472 ("ksmbd: add bounds check for create lease context") Signed-off-by: Wang Zhaolong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 81a29857b1e3..03f606afad93 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1496,7 +1496,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1512,7 +1512,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1521,6 +1521,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** From 0a8f11f8569e7ed16cbcedeb28c4350f6378fea6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 22:41:28 -0400 Subject: [PATCH 160/239] tracing: Do not take trace_event_sem in print_event_fields() On some paths in print_event_fields() it takes the trace_event_sem for read, even though it should always be held when the function is called. Remove the taking of that mutex and add a lockdep_assert_held_read() to make sure the trace_event_sem is held when print_event_fields() is called. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501224128.0b1f0571@batman.local.home Fixes: 80a76994b2d88 ("tracing: Add "fields" option to show raw trace event fields") Reported-by: syzbot+441582c1592938fccf09@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6813ff5e.050a0220.14dd7d.001b.GAE@google.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index fee40ffbd490..b9ab06c99543 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1042,11 +1042,12 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, struct trace_event_call *call; struct list_head *head; + lockdep_assert_held_read(&trace_event_sem); + /* ftrace defined events have separate call structures */ if (event->type <= __TRACE_LAST_TYPE) { bool found = false; - down_read(&trace_event_sem); list_for_each_entry(call, &ftrace_events, list) { if (call->event.type == event->type) { found = true; @@ -1056,7 +1057,6 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, if (call->event.type > __TRACE_LAST_TYPE) break; } - up_read(&trace_event_sem); if (!found) { trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); goto out; From 3393c90daf4e1704c6a5c3833439f461663a2e1d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 21 Apr 2025 08:15:38 -0700 Subject: [PATCH 161/239] drm/xe/hwmon: Fix kernel version documentation for temperature The version in the sysfs attribute should correspond to the version in which this is enabled and visible for end users. It usually doesn't correspond to the version in which the patch was developed, but rather a release that will contain it. Update them to 6.15. Fixes: dac328dea701 ("drm/xe/hwmon: expose package and vram temperature") Reported-by: Ulisses Furquim Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4840 Reviewed-by: Rodrigo Vivi Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250421-hwmon-doc-fix-v1-1-9f68db702249@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8500393a8e6c58e5e7c135133ad792fc6fd5b6f4) Signed-off-by: Lucas De Marchi --- Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314df..cb207c79680d 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. From e8e3a804f3845a147fbdf73f910c12ddb3a2a86f Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 27 Apr 2025 19:47:52 -0700 Subject: [PATCH 162/239] drm/gpusvm: set has_dma_mapping inside mapping loop The 'has_dma_mapping' flag should be set once there is a mapping so it could be unmapped in case of error. v2: - Resend for CI Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Signed-off-by: Dafna Hirschfeld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250428024752.881292-1-matthew.brost@intel.com (cherry picked from commit f64cf7b681af72d3f715c0d0fd72091a54471c1a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e7..de424e670995 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1469,9 +1469,9 @@ map_pages: } i += 1 << order; num_dma_mapped = i; + range->flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { range->flags.has_devmem_pages = true; range->dpagemap = dpagemap; From 6f9a8ab796c6528d22de3c504c81fce7dde63d8a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 23:23:29 -0700 Subject: [PATCH 163/239] btrfs: compression: adjust cb->compressed_folios allocation type In preparation for making the kmalloc() family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct folio **" but the returned type will be "struct page **". These are the same allocation size (pointer size), but the types don't match. Adjust the allocation type to match the assignment. Reviewed-by: Qu Wenruo Signed-off-by: Kees Cook Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e7f8ee5d48a4..7f11ef559be6 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -606,7 +606,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; From bc7e0975093567f51be8e1bdf4aa5900a3cf0b1e Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 25 Apr 2025 09:25:06 -0400 Subject: [PATCH 164/239] btrfs: correct the order of prelim_ref arguments in btrfs__prelim_ref btrfs_prelim_ref() calls the old and new reference variables in the incorrect order. This causes a NULL pointer dereference because oldref is passed as NULL to trace_btrfs_prelim_ref_insert(). Note, trace_btrfs_prelim_ref_insert() is being called with newref as oldref (and oldref as NULL) on purpose in order to print out the values of newref. To reproduce: echo 1 > /sys/kernel/debug/tracing/events/btrfs/btrfs_prelim_ref_insert/enable Perform some writeback operations. Backtrace: BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 115949067 P4D 115949067 PUD 11594a067 PMD 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 1188 Comm: fsstress Not tainted 6.15.0-rc2-tester+ #47 PREEMPT(voluntary) 7ca2cef72d5e9c600f0c7718adb6462de8149622 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014 RIP: 0010:trace_event_raw_event_btrfs__prelim_ref+0x72/0x130 Code: e8 43 81 9f ff 48 85 c0 74 78 4d 85 e4 0f 84 8f 00 00 00 49 8b 94 24 c0 06 00 00 48 8b 0a 48 89 48 08 48 8b 52 08 48 89 50 10 <49> 8b 55 18 48 89 50 18 49 8b 55 20 48 89 50 20 41 0f b6 55 28 88 RSP: 0018:ffffce44820077a0 EFLAGS: 00010286 RAX: ffff8c6b403f9014 RBX: ffff8c6b55825730 RCX: 304994edf9cf506b RDX: d8b11eb7f0fdb699 RSI: ffff8c6b403f9010 RDI: ffff8c6b403f9010 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000010 R10: 00000000ffffffff R11: 0000000000000000 R12: ffff8c6b4e8fb000 R13: 0000000000000000 R14: ffffce44820077a8 R15: ffff8c6b4abd1540 FS: 00007f4dc6813740(0000) GS:ffff8c6c1d378000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 000000010eb42000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: prelim_ref_insert+0x1c1/0x270 find_parent_nodes+0x12a6/0x1ee0 ? __entry_text_end+0x101f06/0x101f09 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 btrfs_is_data_extent_shared+0x167/0x640 ? fiemap_process_hole+0xd0/0x2c0 extent_fiemap+0xa5c/0xbc0 ? __entry_text_end+0x101f05/0x101f09 btrfs_fiemap+0x7e/0xd0 do_vfs_ioctl+0x425/0x9d0 __x64_sys_ioctl+0x75/0xc0 Signed-off-by: Goldwyn Rodrigues Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 549ab3b41961..3efc00cc1bcd 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1928,7 +1928,7 @@ DECLARE_EVENT_CLASS(btrfs__prelim_ref, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct prelim_ref *oldref, const struct prelim_ref *newref, u64 tree_size), - TP_ARGS(fs_info, newref, oldref, tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size), TP_STRUCT__entry_btrfs( __field( u64, root_id ) From d6fe0c69b3aa5c985380b794bdf8e6e9b1811e60 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 25 Apr 2025 12:47:50 -0700 Subject: [PATCH 165/239] btrfs: handle empty eb->folios in num_extent_folios() num_extent_folios() unconditionally calls folio_order() on eb->folios[0]. If that is NULL this will be a segfault. It is reasonable for it to return 0 as the number of folios in the eb when the first entry is NULL, so do that instead. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2e261892c7bc..f5b28b5c4908 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -298,6 +298,8 @@ static inline int __pure num_extent_pages(const struct extent_buffer *eb) */ static inline int __pure num_extent_folios(const struct extent_buffer *eb) { + if (!eb->folios[0]) + return 0; if (folio_order(eb->folios[0])) return 1; return num_extent_pages(eb); From f95d186255b319c48a365d47b69bd997fecb674e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Apr 2025 15:17:50 +0930 Subject: [PATCH 166/239] btrfs: avoid NULL pointer dereference if no valid csum tree [BUG] When trying read-only scrub on a btrfs with rescue=idatacsums mount option, it will crash with the following call trace: BUG: kernel NULL pointer dereference, address: 0000000000000208 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 1 UID: 0 PID: 835 Comm: btrfs Tainted: G O 6.15.0-rc3-custom+ #236 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 02/02/2022 RIP: 0010:btrfs_lookup_csums_bitmap+0x49/0x480 [btrfs] Call Trace: scrub_find_fill_first_stripe+0x35b/0x3d0 [btrfs] scrub_simple_mirror+0x175/0x290 [btrfs] scrub_stripe+0x5f7/0x6f0 [btrfs] scrub_chunk+0x9a/0x150 [btrfs] scrub_enumerate_chunks+0x333/0x660 [btrfs] btrfs_scrub_dev+0x23e/0x600 [btrfs] btrfs_ioctl+0x1dcf/0x2f80 [btrfs] __x64_sys_ioctl+0x97/0xc0 do_syscall_64+0x4f/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e [CAUSE] Mount option "rescue=idatacsums" will completely skip loading the csum tree, so that any data read will not find any data csum thus we will ignore data checksum verification. Normally call sites utilizing csum tree will check the fs state flag NO_DATA_CSUMS bit, but unfortunately scrub does not check that bit at all. This results in scrub to call btrfs_search_slot() on a NULL pointer and triggered above crash. [FIX] Check both extent and csum tree root before doing any tree search. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2c5edcee9450..c3b2e29e3e01 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1541,8 +1541,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, u64 extent_gen; int ret; - if (unlikely(!extent_root)) { - btrfs_err(fs_info, "no valid extent root for scrub"); + if (unlikely(!extent_root || !csum_root)) { + btrfs_err(fs_info, "no valid extent or csum root for scrub"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * From 8fb1dcbbcc1ffe6ed7cf3f0f96d2737491dd1fbf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 17 Jan 2025 09:09:34 +1030 Subject: [PATCH 167/239] Revert "btrfs: canonicalize the device path before adding it" This reverts commit 7e06de7c83a746e58d4701e013182af133395188. Commit 7e06de7c83a7 ("btrfs: canonicalize the device path before adding it") tries to make btrfs to use "/dev/mapper/*" name first, then any filename inside "/dev/" as the device path. This is mostly fine when there is only the root namespace involved, but when multiple namespace are involved, things can easily go wrong for the d_path() usage. As d_path() returns a file path that is namespace dependent, the resulted string may not make any sense in another namespace. Furthermore, the "/dev/" prefix checks itself is not reliable, one can still make a valid initramfs without devtmpfs, and fill all needed device nodes manually. Overall the userspace has all its might to pass whatever device path for mount, and we are not going to win the war trying to cover every corner case. So just revert that commit, and do no extra d_path() based file path sanity check. CC: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/linux-fsdevel/20250115185608.GA2223535@zen.localdomain/ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 91 +--------------------------------------------- 1 file changed, 1 insertion(+), 90 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c8c21c55be53..8e6b6fed7429 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -733,82 +733,6 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) { - ret = PTR_ERR(resolved_path); - goto out; - } - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; @@ -1513,23 +1437,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; u64 bytenr; dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1574,8 +1487,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1584,7 +1496,6 @@ free_disk_super: error_bdev_put: fput(bdev_file); - kfree(canonical_path); return device; } From 38e541051e1d19e8b1479a6af587a7884653e041 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 1 May 2025 02:10:48 +0800 Subject: [PATCH 168/239] btrfs: open code folio_index() in btree_clear_folio_dirty_tag() The folio_index() helper is only needed for mixed usage of page cache and swap cache, for pure page cache usage, the caller can just use folio->index instead. It can't be a swap cache folio here. Swap mapping may only call into fs through 'swap_rw' but btrfs does not use that method for swap. Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Qu Wenruo Signed-off-by: Kairui Song Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8515c31f563b..13bdd60da3c7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3508,8 +3508,8 @@ static void btree_clear_folio_dirty_tag(struct folio *folio) ASSERT(folio_test_locked(folio)); xa_lock_irq(&folio->mapping->i_pages); if (!folio_test_dirty(folio)) - __xa_clear_mark(&folio->mapping->i_pages, - folio_index(folio), PAGECACHE_TAG_DIRTY); + __xa_clear_mark(&folio->mapping->i_pages, folio->index, + PAGECACHE_TAG_DIRTY); xa_unlock_irq(&folio->mapping->i_pages); } From 38a05c0b87833f5b188ae43b428b1f792df2b384 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 2 May 2025 13:22:28 +0200 Subject: [PATCH 169/239] irqchip/qcom-mpm: Prevent crash when trying to handle non-wake GPIOs On Qualcomm chipsets not all GPIOs are wakeup capable. Those GPIOs do not have a corresponding MPM pin and should not be handled inside the MPM driver. The IRQ domain hierarchy is always applied, so it's required to explicitly disconnect the hierarchy for those. The pinctrl-msm driver marks these with GPIO_NO_WAKE_IRQ. qcom-pdc has a check for this, but irq-qcom-mpm is currently missing the check. This is causing crashes when setting up interrupts for non-wake GPIOs: root@rb1:~# gpiomon -c gpiochip1 10 irq: IRQ159: trimming hierarchy from :soc@0:interrupt-controller@f200000-1 Unable to handle kernel paging request at virtual address ffff8000a1dc3820 Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pc : mpm_set_type+0x80/0xcc lr : mpm_set_type+0x5c/0xcc Call trace: mpm_set_type+0x80/0xcc (P) qcom_mpm_set_type+0x64/0x158 irq_chip_set_type_parent+0x20/0x38 msm_gpio_irq_set_type+0x50/0x530 __irq_set_trigger+0x60/0x184 __setup_irq+0x304/0x6bc request_threaded_irq+0xc8/0x19c edge_detector_setup+0x260/0x364 linereq_create+0x420/0x5a8 gpio_ioctl+0x2d4/0x6c0 Fix this by copying the check for GPIO_NO_WAKE_IRQ from qcom-pdc.c, so that MPM is removed entirely from the hierarchy for non-wake GPIOs. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Alexey Klimov Signed-off-by: Stephan Gerhold Signed-off-by: Thomas Gleixner Tested-by: Alexey Klimov Reviewed-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250502-irq-qcom-mpm-fix-no-wake-v1-1-8a1eafcd28d4@linaro.org --- drivers/irqchip/irq-qcom-mpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 7942d8eb3d00..f772deb9cba5 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -227,6 +227,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + if (pin == GPIO_NO_WAKE_IRQ) + return irq_domain_disconnect_hierarchy(domain, virq); + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, &qcom_mpm_chip, priv); if (ret) From 8ed12ab1319b2d8e4a529504777aacacf71371e4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 28 Apr 2025 19:43:22 +0200 Subject: [PATCH 170/239] x86/boot/sev: Support memory acceptance in the EFI stub under SVSM Commit: d54d610243a4 ("x86/boot/sev: Avoid shared GHCB page for early memory acceptance") provided a fix for SEV-SNP memory acceptance from the EFI stub when running at VMPL #0. However, that fix was insufficient for SVSM SEV-SNP guests running at VMPL >0, as those rely on a SVSM calling area, which is a shared buffer whose address is programmed into a SEV-SNP MSR, and the SEV init code that sets up this calling area executes much later during the boot. Given that booting via the EFI stub at VMPL >0 implies that the firmware has configured this calling area already, reuse it for performing memory acceptance in the EFI stub. Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0") Tested-by: Tom Lendacky Co-developed-by: Tom Lendacky Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Cc: Dionna Amalie Glaze Cc: Kevin Loughlin Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20250428174322.2780170-2-ardb+git@google.com --- arch/x86/boot/compressed/mem.c | 5 +---- arch/x86/boot/compressed/sev.c | 40 ++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/sev.h | 2 ++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index f676156d9f3d..0e9f84ab4bdc 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,14 +34,11 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { - static bool sevsnp; - /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { - sevsnp = true; + } else if (early_is_sevsnp_guest()) { snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 89ba168f4f0f..0003e4416efd 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -645,3 +645,43 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) sev_verify_cbit(top_level_pgt); } + +bool early_is_sevsnp_guest(void) +{ + static bool sevsnp; + + if (sevsnp) + return true; + + if (!(sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) + return false; + + sevsnp = true; + + if (!snp_vmpl) { + unsigned int eax, ebx, ecx, edx; + + /* + * CPUID Fn8000_001F_EAX[28] - SVSM support + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax & BIT(28)) { + struct msr m; + + /* Obtain the address of the calling area to use */ + boot_rdmsr(MSR_SVSM_CAA, &m); + boot_svsm_caa = (void *)m.q; + boot_svsm_caa_pa = m.q; + + /* + * The real VMPL level cannot be discovered, but the + * memory acceptance routines make no use of that so + * any non-zero value suffices here. + */ + snp_vmpl = U8_MAX; + } + } + return true; +} diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index 4e463f33186d..d3900384b8ab 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -13,12 +13,14 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 sev_get_status(void); +bool early_is_sevsnp_guest(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 sev_get_status(void) { return 0; } +static inline bool early_is_sevsnp_guest(void) { return false; } #endif From de3629baf5a33af1919dec7136d643b0662e85ef Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 3 May 2025 18:24:01 +0200 Subject: [PATCH 171/239] parisc: Fix double SIGFPE crash Camm noticed that on parisc a SIGFPE exception will crash an application with a second SIGFPE in the signal handler. Dave analyzed it, and it happens because glibc uses a double-word floating-point store to atomically update function descriptors. As a result of lazy binding, we hit a floating-point store in fpe_func almost immediately. When the T bit is set, an assist exception trap occurs when when the co-processor encounters *any* floating-point instruction except for a double store of register %fr0. The latter cancels all pending traps. Let's fix this by clearing the Trap (T) bit in the FP status register before returning to the signal handler in userspace. The issue can be reproduced with this test program: root@parisc:~# cat fpe.c static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main() { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW); return printf("%lf\n",1.7976931348623158E308*1.7976931348623158E308); } root@parisc:~# gcc fpe.c -lm root@parisc:~# ./a.out Floating point exception root@parisc:~# strace -f ./a.out execve("./a.out", ["./a.out"], 0xf9ac7034 /* 20 vars */) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 ... rt_sigaction(SIGFPE, {sa_handler=0x1110a, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0x1078f} --- --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0xf8f21237} --- +++ killed by SIGFPE +++ Floating point exception Signed-off-by: Helge Deller Suggested-by: John David Anglin Reported-by: Camm Maguire Cc: stable@vger.kernel.org --- arch/parisc/math-emu/driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 34495446e051..71829cb7bc81 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -97,9 +97,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; From 92a09c47464d040866cf2b4cd052bc60555185fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 May 2025 13:55:04 -0700 Subject: [PATCH 172/239] Linux 6.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 94be5dfb81fb..b29cc321ffd9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From 68025adfc13e6cd15eebe2293f77659f47daf13b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Apr 2025 17:05:19 +0200 Subject: [PATCH 173/239] um: fix _nofault accesses Nathan reported [1] that when built with clang, the um kernel crashes pretty much immediately. This turned out to be an issue with the inline assembly I had added, when clang used %rax/%eax for both operands. Reorder it so current->thread.segv_continue is written first, and then the lifetime of _faulted won't have overlap with the lifetime of segv_continue. In the email thread Benjamin also pointed out that current->mm is only NULL for true kernel tasks, but we could do this for a userspace task, so the current->thread.segv_continue logic must be lifted out of the mm==NULL check. Finally, while looking at this, put a barrier() so the NULL assignment to thread.segv_continue cannot be reorder before the possibly faulting operation. Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/r/20250402221254.GA384@ax162 [1] Fixes: d1d7f01f7cd3 ("um: mark rodata read-only and implement _nofault accesses") Tested-by: Nathan Chancellor Signed-off-by: Johannes Berg --- arch/um/include/asm/uaccess.h | 2 ++ arch/um/kernel/trap.c | 26 ++++++++++++------------ arch/x86/um/shared/sysdep/faultinfo_32.h | 2 +- arch/x86/um/shared/sysdep/faultinfo_64.h | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 3a08f9029a3f..1c6e0ae41b0c 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -55,6 +55,7 @@ do { \ goto err_label; \ } \ *((type *)dst) = get_unaligned((type *)(src)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) @@ -66,6 +67,7 @@ do { \ if (__faulted) \ goto err_label; \ put_unaligned(*((type *)src), (type *)(dst)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ce073150dc20..ef2272e92a43 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -225,20 +225,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, panic("Failed to sync kernel TLBs: %d", err); goto out; } - else if (current->mm == NULL) { - if (current->pagefault_disabled) { - if (!mc) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault with pagefaults disabled but no mcontext"); - } - if (!current->thread.segv_continue) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault without recovery target"); - } - mc_set_rip(mc, current->thread.segv_continue); - current->thread.segv_continue = NULL; - goto out; + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } + else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index ab5c8e47049c..9193a7790a71 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movl $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index 26fb4835d3e9..61e4ca1e0ab5 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movq $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ From 0db8a9a943e9b651952a62e6e985effb4161ac5e Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Fri, 18 Apr 2025 10:20:14 -0500 Subject: [PATCH 174/239] s390/configs: Enable VDPA on Nvidia ConnectX-6 network card ConnectX-6 is the first VDPA-capable NIC. For earlier NICs, Nvidia implements a VDPA emulation in s/w, which hasn't been validated on s390. Add options necessary for VDPA to work. These options are also added because Fedora has them: CONFIG_VDPA_SIM CONFIG_VDPA_SIM_NET CONFIG_VDPA_SIM_BLOCK CONFIG_VDPA_USER CONFIG_VP_VDPA Signed-off-by: Konstantin Shkolnyy Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 8 ++++++++ arch/s390/configs/defconfig | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6f2c9ce1b154..68ba3bedfe00 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -628,8 +628,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index f18a7d97ac21..57b34f9fabf5 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -618,8 +618,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y From d2b8111c22d7d82f28df574acc54aec44ce3d45c Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Fri, 18 Apr 2025 10:20:15 -0500 Subject: [PATCH 175/239] s390/configs: Enable options required for TC flow offload While testing Open vSwitch with Nvidia ConnectX-6 NIC, it was noticed that it didn't offload TC flows into the NIC, and its log contained many messages such as: "failed to offload flow: No such file or directory: " and, upon enabling more versose logging, additionally: "received NAK error=2 - TC classifier not found" The options enabled here are listed as requirements in Nvidia online documentation, among other options that were already enabled. Now all options listed by Nvidia are enabled.. This option is also added because Fedora has it: CONFIG_NET_EMATCH Signed-off-by: Konstantin Shkolnyy Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 6 ++++++ arch/s390/configs/defconfig | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 68ba3bedfe00..193d251b4c8f 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -395,6 +395,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +408,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 57b34f9fabf5..ec8fdeba4f3e 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -385,6 +385,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +398,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y From ae952eea6f4a7e2193f8721a5366049946e012e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 24 Apr 2025 17:07:01 +0200 Subject: [PATCH 176/239] s390/entry: Fix last breaking event handling in case of stack corruption In case of stack corruption stack_invalid() is called and the expectation is that register r10 contains the last breaking event address. This dependency is quite subtle and broke a couple of years ago without that anybody noticed. Fix this by getting rid of the dependency and read the last breaking event address from lowcore. Fixes: 56e62a737028 ("s390: convert to generic entry") Acked-by: Ilya Leoshkevich Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dd291c9ad6a6..9980c17ba22d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -602,7 +602,8 @@ SYM_CODE_START(stack_invalid) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_invalid From 833542b3e3d23f640aef6569ba1fd641bc195fa0 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 29 Apr 2025 18:21:01 +0200 Subject: [PATCH 177/239] s390/dcssblk: Fix build error with CONFIG_DAX=m and CONFIG_DCSSBLK=y After commit 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support") moved the "select DAX" from config DCSSBLK to the new config DCSSBLK_DAX, randconfig tests could result in build errors like this: s390-linux-ld: drivers/s390/block/dcssblk.o: in function `dcssblk_shared_store': drivers/s390/block/dcssblk.c:417: undefined reference to `kill_dax' s390-linux-ld: drivers/s390/block/dcssblk.c:418: undefined reference to `put_dax' This is because it's now possible to have CONFIG_DCSSBLK=y, but CONFIG_DAX=m. Fix this by adding "depends on DAX || DAX=n" to config DCSSBLK, to make it explicit that we want either no DAX, or the same "y/m" for both config DAX and DCSSBLK, similar to config BLK_DEV_DM. This also requires removing the "select DAX" from config DCSSBLK_DAX, or else there would be a recursive dependency detected. DCSSBLK_DAX is marked as BROKEN at the moment, and won't work well with DAX anyway, so it doesn't really matter if it is selected. Fixes: 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504291604.pvjonhWX-lkp@intel.com/ Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- drivers/s390/block/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 4bfe469c04aa..8c1c908d2c6e 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -5,7 +5,7 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m prompt "DCSSBLK support" - depends on S390 && BLOCK + depends on S390 && BLOCK && (DAX || DAX=n) help Support for dcss block device @@ -14,7 +14,6 @@ config DCSSBLK_DAX depends on DCSSBLK # requires S390 ZONE_DEVICE support depends on BROKEN - select DAX prompt "DCSSBLK DAX support" help Enable DAX operation for the dcss block device From 3a47b1e3cea247857aa48cfe3d0a07e989e6c57d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 30 Apr 2025 16:41:01 +0200 Subject: [PATCH 178/239] s390: Update defconfigs Just the regular update of all defconfigs. Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 14 +++++--------- arch/s390/configs/defconfig | 10 +++------- arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 193d251b4c8f..24b22f6a9e99 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -38,7 +38,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -92,7 +91,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -668,7 +666,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -738,11 +735,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -755,6 +751,8 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -770,7 +768,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -815,7 +812,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -826,9 +822,9 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index ec8fdeba4f3e..2b8b42d569bc 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -86,7 +85,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -655,7 +653,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -725,6 +722,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -756,7 +754,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -802,7 +799,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -813,10 +809,10 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 853b2326a171..8163c1702720 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -70,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set From 3769478610135e82b262640252d90f6efb05be71 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:54 -0700 Subject: [PATCH 179/239] sch_htb: make htb_deactivate() idempotent Alan reported a NULL pointer dereference in htb_next_rb_node() after we made htb_qlen_notify() idempotent. It turns out in the following case it introduced some regression: htb_dequeue_tree(): |-> fq_codel_dequeue() |-> qdisc_tree_reduce_backlog() |-> htb_qlen_notify() |-> htb_deactivate() |-> htb_next_rb_node() |-> htb_deactivate() For htb_next_rb_node(), after calling the 1st htb_deactivate(), the clprio[prio]->ptr could be already set to NULL, which means htb_next_rb_node() is vulnerable here. For htb_deactivate(), although we checked qlen before calling it, in case of qlen==0 after qdisc_tree_reduce_backlog(), we may call it again which triggers the warning inside. To fix the issues here, we need to: 1) Make htb_deactivate() idempotent, that is, simply return if we already call it before. 2) Make htb_next_rb_node() safe against ptr==NULL. Many thanks to Alan for testing and for the reproducer. Fixes: 5ba8b837b522 ("sch_htb: make htb_qlen_notify() idempotent") Reported-by: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4b9a639b642e..14bf71f57057 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1485,8 +1486,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (!cl->prio_activity) - return; htb_deactivate(qdisc_priv(sch), cl); } @@ -1740,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1949,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { From 63890286f557aa5c4eed7e90a5a31658de8fdb4d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:55 -0700 Subject: [PATCH 180/239] selftests/tc-testing: Add a test case to cover basic HTB+FQ_CODEL case Integrate the reproducer from Alan into TC selftests and use scapy to generate TCP traffic instead of relying on ping command. Cc: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0843f6d37e9c..a951c0d33cd2 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -538,5 +538,40 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "62c4", + "name": "Test HTB with FQ_CODEL - basic functionality", + "category": [ + "qdisc", + "htb", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 root handle 1: htb default 11", + "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit", + "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486", + "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn", + "sleep 0.5" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'", + "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 handle 1: root" + ] } ] From 1e20324b23f0afba27997434fb978f1e4a1dbcb6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:37:58 -0700 Subject: [PATCH 181/239] virtio-net: don't re-enable refill work too early when NAPI is disabled Commit 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") fixed a deadlock between reconfig paths and refill work trying to disable the same NAPI instance. The refill work can't run in parallel with reconfig because trying to double-disable a NAPI instance causes a stall under the instance lock, which the reconfig path needs to re-enable the NAPI and therefore unblock the stalled thread. There are two cases where we re-enable refill too early. One is in the virtnet_set_queues() handler. We call it when installing XDP: virtnet_rx_pause_all(vi); ... virtnet_napi_tx_disable(..); ... virtnet_set_queues(..); ... virtnet_rx_resume_all(..); We want the work to be disabled until we call virtnet_rx_resume_all(), but virtnet_set_queues() kicks it before NAPIs were re-enabled. The other case is a more trivial case of mis-ordering in __virtnet_rx_resume() found by code inspection. Taking the spin lock in virtnet_set_queues() (requested during review) may be unnecessary as we are under rtnl_lock and so are all paths writing to ->refill_enabled. Acked-by: Michael S. Tsirkin Reviewed-by: Bui Quang Minh Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") Link: https://patch.msgid.link/20250430163758.3029367-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 848fab51dfa1..b5549d542c02 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3383,12 +3383,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, bool refill) { bool running = netif_running(vi->dev); + bool schedule_refill = false; if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - + schedule_refill = true; if (running) virtnet_napi_enable(rq); + + if (schedule_refill) + schedule_delayed_work(&vi->refill, 0); } static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3728,8 +3731,10 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) + spin_lock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP && vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); + spin_unlock_bh(&vi->refill_lock); return 0; } From 4397684a292a71fbc1e815c3e283f7490ddce5ae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:38:36 -0700 Subject: [PATCH 182/239] virtio-net: free xsk_buffs on error in virtnet_xsk_pool_enable() The selftests added to our CI by Bui Quang Minh recently reveals that there is a mem leak on the error path of virtnet_xsk_pool_enable(): unreferenced object 0xffff88800a68a000 (size 2048): comm "xdp_helper", pid 318, jiffies 4294692778 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): __kvmalloc_node_noprof+0x402/0x570 virtnet_xsk_pool_enable+0x293/0x6a0 (drivers/net/virtio_net.c:5882) xp_assign_dev+0x369/0x670 (net/xdp/xsk_buff_pool.c:226) xsk_bind+0x6a5/0x1ae0 __sys_bind+0x15e/0x230 __x64_sys_bind+0x72/0xb0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Acked-by: Jason Wang Fixes: e9f3962441c0 ("virtio_net: xsk: rx: support fill with xsk buffer") Link: https://patch.msgid.link/20250430163836.3029761-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b5549d542c02..f9e3e628ec4d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5890,8 +5890,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5919,6 +5921,8 @@ err_rq: err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } From c360eb0c3ccb95306704fd221442283ee82f1f58 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 30 Apr 2025 11:21:35 -0500 Subject: [PATCH 183/239] dt-bindings: net: ethernet-controller: Add informative text about RGMII delays Device Tree and Ethernet MAC driver writers often misunderstand RGMII delays. Rewrite the Normative section in terms of the PCB, is the PCB adding the 2ns delay. This meaning was previous implied by the definition, but often wrongly interpreted due to the ambiguous wording and looking at the definition from the wrong perspective. The new definition concentrates clearly on the hardware, and should be less ambiguous. Add an Informative section to the end of the binding describing in detail what the four RGMII delays mean. This expands on just the PCB meaning, adding in the implications for the MAC and PHY. Additionally, when the MAC or PHY needs to add a delay, which is software configuration, describe how Linux does this, in the hope of reducing errors. Make it clear other users of device tree binding may implement the software configuration in other ways while still conforming to the binding. Fixes: 9d3de3c58347 ("dt-bindings: net: Add YAML schemas for the generic Ethernet options") Signed-off-by: Andrew Lunn Acked-by: Conor Dooley Link: https://patch.msgid.link/20250430-v6-15-rc3-net-rgmii-delays-v2-1-099ae651d5e5@lunn.ch Signed-off-by: Jakub Kicinski --- .../bindings/net/ethernet-controller.yaml | 97 +++++++++++++++++-- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 45819b235800..a2d4c626f659 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -74,19 +74,17 @@ properties: - rev-rmii - moca - # RX and TX delays are added by the MAC when required + # RX and TX delays are provided by the PCB. See below - rgmii - # RGMII with internal RX and TX delays provided by the PHY, - # the MAC should not add the RX or TX delays in this case + # RX and TX delays are not provided by the PCB. This is the most + # frequent case. See below - rgmii-id - # RGMII with internal RX delay provided by the PHY, the MAC - # should not add an RX delay in this case + # TX delay is provided by the PCB. See below - rgmii-rxid - # RGMII with internal TX delay provided by the PHY, the MAC - # should not add an TX delay in this case + # RX delay is provided by the PCB. See below - rgmii-txid - rtbi - smii @@ -286,4 +284,89 @@ allOf: additionalProperties: true +# Informative +# =========== +# +# 'phy-modes' & 'phy-connection-type' properties 'rgmii', 'rgmii-id', +# 'rgmii-rxid', and 'rgmii-txid' are frequently used wrongly by +# developers. This informative section clarifies their usage. +# +# The RGMII specification requires a 2ns delay between the data and +# clock signals on the RGMII bus. How this delay is implemented is not +# specified. +# +# One option is to make the clock traces on the PCB longer than the +# data traces. A sufficiently difference in length can provide the 2ns +# delay. If both the RX and TX delays are implemented in this manner, +# 'rgmii' should be used, so indicating the PCB adds the delays. +# +# If the PCB does not add these delays via extra long traces, +# 'rgmii-id' should be used. Here, 'id' refers to 'internal delay', +# where either the MAC or PHY adds the delay. +# +# If only one of the two delays are implemented via extra long clock +# lines, either 'rgmii-rxid' or 'rgmii-txid' should be used, +# indicating the MAC or PHY should implement one of the delays +# internally, while the PCB implements the other delay. +# +# Device Tree describes hardware, and in this case, it describes the +# PCB between the MAC and the PHY, if the PCB implements delays or +# not. +# +# In practice, very few PCBs make use of extra long clock lines. Hence +# any RGMII phy mode other than 'rgmii-id' is probably wrong, and is +# unlikely to be accepted during review without details provided in +# the commit description and comments in the .dts file. +# +# When the PCB does not implement the delays, the MAC or PHY must. As +# such, this is software configuration, and so not described in Device +# Tree. +# +# The following describes how Linux implements the configuration of +# the MAC and PHY to add these delays when the PCB does not. As stated +# above, developers often get this wrong, and the aim of this section +# is reduce the frequency of these errors by Linux developers. Other +# users of the Device Tree may implement it differently, and still be +# consistent with both the normative and informative description +# above. +# +# By default in Linux, when using phylib/phylink, the MAC is expected +# to read the 'phy-mode' from Device Tree, not implement any delays, +# and pass the value to the PHY. The PHY will then implement delays as +# specified by the 'phy-mode'. The PHY should always be reconfigured +# to implement the needed delays, replacing any setting performed by +# strapping or the bootloader, etc. +# +# Experience to date is that all PHYs which implement RGMII also +# implement the ability to add or not add the needed delays. Hence +# this default is expected to work in all cases. Ignoring this default +# is likely to be questioned by Reviews, and require a strong argument +# to be accepted. +# +# There are a small number of cases where the MAC has hard coded +# delays which cannot be disabled. The 'phy-mode' only describes the +# PCB. The inability to disable the delays in the MAC does not change +# the meaning of 'phy-mode'. It does however mean that a 'phy-mode' of +# 'rgmii' is now invalid, it cannot be supported, since both the PCB +# and the MAC and PHY adding delays cannot result in a functional +# link. Thus the MAC should report a fatal error for any modes which +# cannot be supported. When the MAC implements the delay, it must +# ensure that the PHY does not also implement the same delay. So it +# must modify the phy-mode it passes to the PHY, removing the delay it +# has added. Failure to remove the delay will result in a +# non-functioning link. +# +# Sometimes there is a need to fine tune the delays. Often the MAC or +# PHY can perform this fine tuning. In the MAC node, the Device Tree +# properties 'rx-internal-delay-ps' and 'tx-internal-delay-ps' should +# be used to indicate fine tuning performed by the MAC. The values +# expected here are small. A value of 2000ps, i.e 2ns, and a phy-mode +# of 'rgmii' will not be accepted by Reviewers. +# +# If the PHY is to perform fine tuning, the properties +# 'rx-internal-delay-ps' and 'tx-internal-delay-ps' in the PHY node +# should be used. When the PHY is implementing delays, e.g. 'rgmii-id' +# these properties should have a value near to 2000ps. If the PCB is +# implementing delays, e.g. 'rgmii', a small value can be used to fine +# tune the delay added by the PCB. ... From 3e6a0243ff002ddbd7ee18a8974ae61d2e6ed00d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:52 +0200 Subject: [PATCH 184/239] gre: Fix again IPv6 link-local address generation. Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Note: This patch was originally applied as commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). However, it was then reverted by commit fc486c2d060f ("Revert "gre: Fix IPv6 link-local address generation."") because it uncovered another bug that ended up breaking net/forwarding/ip6gre_custom_multipath_hash.sh. That other bug has now been fixed by commit 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop"). Therefore we can now revive this GRE patch (no changes since original commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/a88cc5c4811af36007645d610c95102dccb360a6.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ba83f0c9928..c6b22170dc49 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3214,16 +3214,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3534,7 +3531,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From b6a6006b0e3d10e62c465613f07ff49268baedb1 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:59 +0200 Subject: [PATCH 185/239] selftests: Add IPv6 link-local address generation tests for GRE devices. GRE devices have their special code for IPv6 link-local address generation that has been the source of several regressions in the past. Add selftest to check that all gre, ip6gre, gretap and ip6gretap get an IPv6 link-link local address in accordance with the net.ipv6.conf..addr_gen_mode sysctl. Note: This patch was originally applied as commit 6f50175ccad4 ("selftests: Add IPv6 link-local address generation tests for GRE devices."). However, it was then reverted by commit 355d940f4d5a ("Revert "selftests: Add IPv6 link-local address generation tests for GRE devices."") because the commit it depended on was going to be reverted. Now that the situation is resolved, we can add this selftest again (no changes since original patch, appart from context update in tools/testing/selftests/net/Makefile). Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/2c3a5733cb3a6e3119504361a9b9f89fda570a2d.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/gre_ipv6_lladdr.sh | 177 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100755 tools/testing/selftests/net/gre_ipv6_lladdr.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 124078b56fa4..70a38f485d4d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 From b344a48cbe5fb24697addc6afbb7358b938a7d31 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:16 -0700 Subject: [PATCH 186/239] selftests: drv: net: fix test failure on ipv6 sys The `get_interface_info` call has ip version hard-coded which leads to failures on an IPV6 system. The NetDrvEnv class already gathers information about remote interface, so instead of fixing the local implementation switch to using cfg.remote_ifname. Before: ./drivers/net/ping.py Traceback (most recent call last): File "/new_tests/./drivers/net/ping.py", line 217, in main() File "/new_tests/./drivers/net/ping.py", line 204, in main get_interface_info(cfg) File "/new_tests/./drivers/net/ping.py", line 128, in get_interface_info raise KsftFailEx('Can not get remote interface') net.lib.py.ksft.KsftFailEx: Can not get remote interface After: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default # SKIP Test requires IPv4 connectivity ok 2 ping.test_xdp_generic_sb # SKIP Test requires IPv4 connectivity ok 3 ping.test_xdp_generic_mb # SKIP Test requires IPv4 connectivity ok 4 ping.test_xdp_native_sb # SKIP Test requires IPv4 connectivity ok 5 ping.test_xdp_native_mb # SKIP Test requires IPv4 connectivity ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:0 fail:0 xfail:0 xpass:0 skip:6 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Reviewed-by: David Wei Link: https://patch.msgid.link/20250503013518.1722913-2-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 22 +++++++++------------ 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 4b6822866066..5272e8b3536d 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -9,7 +9,6 @@ from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port from lib.py import defer, ethtool, ip -remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: @@ -57,7 +56,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: def _set_xdp_generic_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +65,8 @@ def _set_xdp_generic_sb_on(cfg) -> None: def _set_xdp_generic_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") @@ -76,7 +75,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: def _set_xdp_native_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] @@ -93,8 +92,8 @@ def _set_xdp_native_sb_on(cfg) -> None: def _set_xdp_native_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -112,18 +111,15 @@ def _set_xdp_offload_on(cfg) -> None: except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') defer(ip, f"link set dev {cfg.ifname} xdpoffload off") - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: time.sleep(10) def get_interface_info(cfg) -> None: - global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout - remote_ifname = remote_info.rstrip('\n') - if remote_ifname == "": + if cfg.remote_ifname == "": raise KsftFailEx('Can not get remote interface') local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": @@ -136,7 +132,7 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) def test_default(cfg, netnl) -> None: _set_offload_checksum(cfg, netnl, "off") From 8bb7d8e5cf7f44ea89a445975cbd78888324f234 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:17 -0700 Subject: [PATCH 187/239] selftests: drv: net: avoid skipping tests On a system with either of the ipv4 or ipv6 information missing, tests are currently skipped. Ideally, the test should run as long as at least one address family is present. This patch make test run whenever possible. Before: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default # SKIP Test requires IPv4 connectivity ok 2 ping.test_xdp_generic_sb # SKIP Test requires IPv4 connectivity ok 3 ping.test_xdp_generic_mb # SKIP Test requires IPv4 connectivity ok 4 ping.test_xdp_native_sb # SKIP Test requires IPv4 connectivity ok 5 ping.test_xdp_native_mb # SKIP Test requires IPv4 connectivity ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:0 fail:0 xfail:0 xpass:0 skip:6 error:0 After: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default ok 2 ping.test_xdp_generic_sb ok 3 ping.test_xdp_generic_mb ok 4 ping.test_xdp_native_sb ok 5 ping.test_xdp_native_mb ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:5 fail:0 xfail:0 xpass:0 skip:1 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250503013518.1722913-3-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 5272e8b3536d..16b7d3ab0fc8 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -12,7 +12,8 @@ from lib.py import defer, ethtool, ip no_sleep=False def _test_v4(cfg) -> None: - cfg.require_ipver("4") + if not cfg.addr_v["4"]: + return cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) @@ -20,7 +21,8 @@ def _test_v4(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_ipver("6") + if not cfg.addr_v["6"]: + return cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) From 4a9d494ca24b7fd509b3953fcb43d580cb05097b Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:18 -0700 Subject: [PATCH 188/239] selftests: drv: net: add version indicator Currently, the test result does not differentiate between the cases when either one of the address families are configured or if both the address families are configured. Ideally, the result should report if a particular case was skipped. ./drivers/net/ping.py TAP version 13 1..7 ok 1 ping.test_default_v4 # SKIP Test requires IPv4 connectivity ok 2 ping.test_default_v6 ok 3 ping.test_xdp_generic_sb ok 4 ping.test_xdp_generic_mb ok 5 ping.test_xdp_native_sb ok 6 ping.test_xdp_native_mb ok 7 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:5 fail:0 xfail:0 xpass:0 skip:2 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Reviewed-by: David Wei Link: https://patch.msgid.link/20250503013518.1722913-4-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 16b7d3ab0fc8..af8df2313a3b 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -136,13 +136,23 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) -def test_default(cfg, netnl) -> None: +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") + _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) - _test_v6(cfg) _test_tcp(cfg) _set_offload_checksum(cfg, netnl, "on") _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") _test_v6(cfg) _test_tcp(cfg) @@ -200,7 +210,8 @@ def main() -> None: with NetDrvEpEnv(__file__) as cfg: get_interface_info(cfg) set_interface_init(cfg) - ksft_run([test_default, + ksft_run([test_default_v4, + test_default_v6, test_xdp_generic_sb, test_xdp_generic_mb, test_xdp_native_sb, From 4720f9707c783f642332dee3d56dccaefa850e42 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 2 May 2025 21:30:50 -0700 Subject: [PATCH 189/239] tools: ynl-gen: validate 0 len strings from kernel Strings from the kernel are guaranteed to be null terminated and ynl_attr_validate() checks for this. But it doesn't check if the string has a len of 0, which would cause problems when trying to access data[len - 1]. Fix this by checking that len is positive. Signed-off-by: David Wei Link: https://patch.msgid.link/20250503043050.861238-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ce32cb35007d..c4da34048ef8 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); From 4db6c75124d871fbabf8243f947d34cc7e0697fc Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 5 May 2025 02:07:32 +0100 Subject: [PATCH 190/239] net: ethernet: mtk_eth_soc: reset all TX queues on DMA free The purpose of resetting the TX queue is to reset the byte and packet count as well as to clear the software flow control XOFF bit. MediaTek developers pointed out that netdev_reset_queue would only resets queue 0 of the network device. Queues that are not reset may cause unexpected issues. Packets may stop being sent after reset and "transmit timeout" log may be displayed. Import fix from MediaTek's SDK to resolve this issue. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/319c0d9905579a46dc448579f892f364f1f84818 Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c9ff9adceac4f152239a0f65c397f13547639175.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8fda4ce80d81..53c39561b6d9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3186,11 +3186,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->tx.desc_size, From e8716b5b0dff1b3d523b4a83fd5e94d57b887c5c Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 5 May 2025 02:07:58 +0100 Subject: [PATCH 191/239] net: ethernet: mtk_eth_soc: do not reset PSE when setting FE Remove redundant PSE reset. When setting FE register there is no need to reset PSE, doing so may cause FE to work abnormal. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/3a5223473e086a4b54a2b9a44df7d9ddcc2bc75a Fixes: dee4dd10c79aa ("net: ethernet: mtk_eth_soc: ppe: add support for multiple PPEs") Signed-off-by: Frank Wunderlich Link: https://patch.msgid.link/18f0ac7d83f82defa3342c11ef0d1362f6b81e88.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 53c39561b6d9..22a532695fb0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3473,9 +3473,6 @@ static int mtk_open(struct net_device *dev) } mtk_gdm_config(eth, target_mac->id, gdm_config); } - /* Reset and enable PSE */ - mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); - mtk_w32(eth, 0, MTK_RST_GL); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); From f1aff4bc199cb92c055668caed65505e3b4d2656 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 6 May 2025 11:31:50 +0000 Subject: [PATCH 192/239] dm: fix copying after src array boundaries The blammed commit copied to argv the size of the reallocated argv, instead of the size of the old_argv, thus reading and copying from past the old_argv allocated memory. Following BUG_ON was hit: [ 3.038929][ T1] kernel BUG at lib/string_helpers.c:1040! [ 3.039147][ T1] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... [ 3.056489][ T1] Call trace: [ 3.056591][ T1] __fortify_panic+0x10/0x18 (P) [ 3.056773][ T1] dm_split_args+0x20c/0x210 [ 3.056942][ T1] dm_table_add_target+0x13c/0x360 [ 3.057132][ T1] table_load+0x110/0x3ac [ 3.057292][ T1] dm_ctl_ioctl+0x424/0x56c [ 3.057457][ T1] __arm64_sys_ioctl+0xa8/0xec [ 3.057634][ T1] invoke_syscall+0x58/0x10c [ 3.057804][ T1] el0_svc_common+0xa8/0xdc [ 3.057970][ T1] do_el0_svc+0x1c/0x28 [ 3.058123][ T1] el0_svc+0x50/0xac [ 3.058266][ T1] el0t_64_sync_handler+0x60/0xc4 [ 3.058452][ T1] el0t_64_sync+0x1b0/0x1b4 [ 3.058620][ T1] Code: f800865e a9bf7bfd 910003fd 941f48aa (d4210000) [ 3.058897][ T1] ---[ end trace 0000000000000000 ]--- [ 3.059083][ T1] Kernel panic - not syncing: Oops - BUG: Fatal exception Fix it by copying the size of src, and not the size of dst, as it was. Fixes: 5a2a6c428190 ("dm: always update the array size in realloc_argv on success") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 31d67a1a91dd..6b23e777e10e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -524,9 +524,9 @@ static char **realloc_argv(unsigned int *size, char **old_argv) } argv = kmalloc_array(new_size, sizeof(*argv), gfp); if (argv) { - *size = new_size; if (old_argv) memcpy(argv, old_argv, *size * sizeof(*argv)); + *size = new_size; } kfree(old_argv); From 0ca6df4f40cf4c32487944aaf48319cb6c25accc Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Fri, 2 May 2025 08:21:58 +0900 Subject: [PATCH 193/239] ksmbd: prevent out-of-bounds stream writes by validating *pos ksmbd_vfs_stream_write() did not validate whether the write offset (*pos) was within the bounds of the existing stream data length (v_len). If *pos was greater than or equal to v_len, this could lead to an out-of-bounds memory write. This patch adds a check to ensure *pos is less than v_len before proceeding. If the condition fails, -EINVAL is returned. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 391d07da586c..482eba0f4dc1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -426,6 +426,13 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } + if (v_len <= *pos) { + pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", + *pos, v_len); + err = -EINVAL; + goto out; + } + if (v_len < size) { wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP); if (!wbuf) { From 36991c1ccde2d5a521577c448ffe07fcccfe104d Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Tue, 6 May 2025 22:04:52 +0900 Subject: [PATCH 194/239] ksmbd: Fix UAF in __close_file_table_ids A use-after-free is possible if one thread destroys the file via __ksmbd_close_fd while another thread holds a reference to it. The existing checks on fp->refcount are not sufficient to prevent this. The fix takes ft->lock around the section which removes the file from the file table. This prevents two threads acquiring the same file pointer via __close_file_table_ids, as well as the other functions which retrieve a file from the IDR and which already use this same lock. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1f8fa3468173..dfed6fce8904 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -661,21 +661,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } + + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } From dcaeeb8ae84c5506ebc574732838264f3887738c Mon Sep 17 00:00:00 2001 From: Antonios Salios Date: Fri, 25 Apr 2025 13:17:45 +0200 Subject: [PATCH 195/239] can: m_can: m_can_class_allocate_dev(): initialize spin lock on device probe The spin lock tx_handling_spinlock in struct m_can_classdev is not being initialized. This leads the following spinlock bad magic complaint from the kernel, eg. when trying to send CAN frames with cansend from can-utils: | BUG: spinlock bad magic on CPU#0, cansend/95 | lock: 0xff60000002ec1010, .magic: 00000000, .owner: /-1, .owner_cpu: 0 | CPU: 0 UID: 0 PID: 95 Comm: cansend Not tainted 6.15.0-rc3-00032-ga79be02bba5c #5 NONE | Hardware name: MachineWare SIM-V (DT) | Call Trace: | [] dump_backtrace+0x1c/0x24 | [] show_stack+0x28/0x34 | [] dump_stack_lvl+0x4a/0x68 | [] dump_stack+0x14/0x1c | [] spin_dump+0x62/0x6e | [] do_raw_spin_lock+0xd0/0x142 | [] _raw_spin_lock_irqsave+0x20/0x2c | [] m_can_start_xmit+0x90/0x34a | [] dev_hard_start_xmit+0xa6/0xee | [] sch_direct_xmit+0x114/0x292 | [] __dev_queue_xmit+0x3b0/0xaa8 | [] can_send+0xc6/0x242 | [] raw_sendmsg+0x1a8/0x36c | [] sock_write_iter+0x9a/0xee | [] vfs_write+0x184/0x3a6 | [] ksys_write+0xa0/0xc0 | [] __riscv_sys_write+0x14/0x1c | [] do_trap_ecall_u+0x168/0x212 | [] handle_exception+0x146/0x152 Initializing the spin lock in m_can_class_allocate_dev solves that problem. Fixes: 1fa80e23c150 ("can: m_can: Introduce a tx_fifo_in_flight counter") Signed-off-by: Antonios Salios Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250425111744.37604-2-antonios@mwa.re Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 884a6352c42b..326ede9d400f 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2379,6 +2379,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, SET_NETDEV_DEV(net_dev, dev); m_can_of_parse_mram(class_dev, mram_config_vals); + spin_lock_init(&class_dev->tx_handling_spinlock); out: return class_dev; } From 5e1663810e11c64956aa7e280cf74b2f3284d816 Mon Sep 17 00:00:00 2001 From: Kelsey Maes Date: Wed, 30 Apr 2025 09:15:01 -0700 Subject: [PATCH 196/239] can: mcp251xfd: fix TDC setting for low data bit rates The TDC is currently hardcoded enabled. This means that even for lower CAN-FD data bitrates (with a DBRP (data bitrate prescaler) > 2) a TDC is configured. This leads to a bus-off condition. ISO 11898-1 section 11.3.3 says "Transmitter delay compensation" (TDC) is only applicable if DBRP is 1 or 2. To fix the problem, switch the driver to use the TDC calculation provided by the CAN driver framework (which respects ISO 11898-1 section 11.3.3). This has the positive side effect that userspace can control TDC as needed. Demonstration of the feature in action: | $ ip link set can0 up type can bitrate 125000 dbitrate 500000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 125000 sample-point 0.875 | tq 50 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 2 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 500000 dsample-point 0.875 | dtq 125 dprop-seg 6 dphase-seg1 7 dphase-seg2 2 dsjw 1 dbrp 5 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 | $ ip link set can0 up type can bitrate 1000000 dbitrate 4000000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 1000000 sample-point 0.750 | tq 25 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 5 brp 1 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 4000000 dsample-point 0.700 | dtq 25 dprop-seg 3 dphase-seg1 3 dphase-seg2 3 dsjw 1 dbrp 1 | tdco 7 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 There has been some confusion about the MCP2518FD using a relative or absolute TDCO due to the datasheet specifying a range of [-64,63]. I have a custom board with a 40 MHz clock and an estimated loop delay of 100 to 216 ns. During testing at a data bit rate of 4 Mbit/s I found that using can_get_relative_tdco() resulted in bus-off errors. The final TDCO value was 1 which corresponds to a 10% SSP in an absolute configuration. This behavior is expected if the TDCO value is really absolute and not relative. Using priv->can.tdc.tdco instead results in a final TDCO of 8, setting the SSP at exactly 80%. This configuration works. The automatic, manual, and off TDC modes were tested at speeds up to, and including, 8 Mbit/s on real hardware and behave as expected. Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Reported-by: Kelsey Maes Closes: https://lore.kernel.org/all/C2121586-C87F-4B23-A933-845362C29CA1@vpprocess.com Reviewed-by: Vincent Mailhol Signed-off-by: Kelsey Maes Link: https://patch.msgid.link/20250430161501.79370-1-kelsey@vpprocess.com [mkl: add comment] Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3bc56517fe7a..064d81c724f4 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -75,6 +75,24 @@ static const struct can_bittiming_const mcp251xfd_data_bittiming_const = { .brp_inc = 1, }; +/* The datasheet of the mcp2518fd (DS20006027B) specifies a range of + * [-64,63] for TDCO, indicating a relative TDCO. + * + * Manual tests have shown, that using a relative TDCO configuration + * results in bus off, while an absolute configuration works. + * + * For TDCO use the max value (63) from the data sheet, but 0 as the + * minimum. + */ +static const struct can_tdc_const mcp251xfd_tdc_const = { + .tdcv_min = 0, + .tdcv_max = 63, + .tdco_min = 0, + .tdco_max = 63, + .tdcf_min = 0, + .tdcf_max = 0, +}; + static const char *__mcp251xfd_get_model_str(enum mcp251xfd_model model) { switch (model) { @@ -510,8 +528,7 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) { const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u32 val = 0; - s8 tdco; + u32 tdcmod, val = 0; int err; /* CAN Control Register @@ -575,11 +592,16 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) return err; /* Transmitter Delay Compensation */ - tdco = clamp_t(int, dbt->brp * (dbt->prop_seg + dbt->phase_seg1), - -64, 63); - val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, - MCP251XFD_REG_TDC_TDCMOD_AUTO) | - FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, tdco); + if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_AUTO) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_AUTO; + else if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_MANUAL) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_MANUAL; + else + tdcmod = MCP251XFD_REG_TDC_TDCMOD_DISABLED; + + val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, tdcmod) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCV_MASK, priv->can.tdc.tdcv) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, priv->can.tdc.tdco); return regmap_write(priv->map_reg, MCP251XFD_REG_TDC, val); } @@ -2083,10 +2105,12 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; + priv->can.tdc_const = &mcp251xfd_tdc_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | - CAN_CTRLMODE_CC_LEN8_DLC; + CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO | + CAN_CTRLMODE_TDC_MANUAL; set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); priv->ndev = ndev; priv->spi = spi; From 84f5eb833f53ae192baed4cfb8d9eaab43481fc9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:44 +0200 Subject: [PATCH 197/239] can: mcp251xfd: mcp251xfd_remove(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. With the mcp251xfd driver the removal of the module causes the following warning: | WARNING: CPU: 0 PID: 352 at net/core/dev.c:7342 __netif_napi_del_locked+0xc8/0xd8 as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-1-59a9b131589d@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 064d81c724f4..c30b04f8fc0d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2198,8 +2198,8 @@ static void mcp251xfd_remove(struct spi_device *spi) struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); mcp251xfd_unregister(priv); + can_rx_offload_del(&priv->offload); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); } From 037ada7a3181300218e4fd78bef6a741cfa7f808 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:45 +0200 Subject: [PATCH 198/239] can: rockchip_canfd: rkcanfd_remove(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: ff60bfbaf67f ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-2-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 7107a37da36c..c3fb3176ce42 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -937,8 +937,8 @@ static void rkcanfd_remove(struct platform_device *pdev) struct rkcanfd_priv *priv = platform_get_drvdata(pdev); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); rkcanfd_unregister(priv); + can_rx_offload_del(&priv->offload); free_candev(ndev); } From 0713a1b3276b98c7dafbeefef00d7bc3a9119a84 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:46 +0200 Subject: [PATCH 199/239] can: mcan: m_can_class_unregister(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-3-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 326ede9d400f..c2c116ce1087 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2463,9 +2463,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); From 511e64e13d8cc72853275832e3f372607466c18c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 29 Apr 2025 09:05:55 +0200 Subject: [PATCH 200/239] can: gw: fix RCU/BH usage in cgw_create_job() As reported by Sebastian Andrzej Siewior the use of local_bh_disable() is only feasible in uni processor systems to update the modification rules. The usual use-case to update the modification rules is to update the data of the modifications but not the modification types (AND/OR/XOR/SET) or the checksum functions itself. To omit additional memory allocations to maintain fast modification switching times, the modification description space is doubled at gw-job creation time so that only the reference to the active modification description is changed under rcu protection. Rename cgw_job::mod to cf_mod and make it a RCU pointer. Allocate in cgw_create_job() and free it together with cgw_job in cgw_job_free_rcu(). Update all users to dereference cgw_job::cf_mod with a RCU accessor and if possible once. [bigeasy: Replace mod1/mod2 from the Oliver's original patch with dynamic allocation, use RCU annotation and accessor] Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/ Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs") Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250429070555.cs-7b_eZ@linutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 151 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 60 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index ef93293c1fae..55eccb1c7620 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); - if (err < 0) - return err; + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; - if (mod.uid) { + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + if (err < 0) + goto out_free_cf; + + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } From 023c1f2f0609218103cbcb48e0104b144d4a16dc Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 24 Apr 2025 18:01:42 +0530 Subject: [PATCH 201/239] wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation Currently during the multi-link element defragmentation process, the multi-link element length added to the total IEs length when calculating the length of remaining IEs after the multi-link element in cfg80211_defrag_mle(). This could lead to out-of-bounds access if the multi-link element or its corresponding fragment elements are the last elements in the IEs buffer. To address this issue, correctly calculate the remaining IEs length by deducting the multi-link element end offset from total IEs end offset. Cc: stable@vger.kernel.org Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20250424-fix_mle_defragmentation_oob_access-v1-1-84412a1743fa@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9865f305275d..ddd3a97f6609 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2681,7 +2681,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; From e12a42f64fc3d74872b349eedd47f90c6676b78a Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 5 May 2025 16:19:46 +0800 Subject: [PATCH 202/239] wifi: mac80211: fix the type of status_code for negotiated TID to Link Mapping The status code should be type of __le16. Fixes: 83e897a961b8 ("wifi: ieee80211: add definitions for negotiated TID to Link map") Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20250505081946.3927214-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- net/mac80211/mlme.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc..457b4fba88bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1526,7 +1526,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5d1f2d6d09ad..35eaf0812c5b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7675,6 +7675,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + u16 status_code; skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); if (!skb) @@ -7697,19 +7698,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, WARN_ON(1); fallthrough; case NEG_TTLM_RES_REJECT: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; break; case NEG_TTLM_RES_ACCEPT: - mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + status_code = WLAN_STATUS_SUCCESS; break; case NEG_TTLM_RES_SUGGEST_PREFERRED: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); break; } + mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -7875,7 +7875,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } From ebedf8b7f05b9c886d68d63025db8d1b12343157 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 May 2025 21:42:59 +0200 Subject: [PATCH 203/239] wifi: iwlwifi: add support for Killer on MTL For now, we need another entry for these devices, this will be changed completely for 6.16. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219926 Link: https://patch.msgid.link/20250506214258.2efbdc9e9a82.I31915ec252bd1c74bd53b89a0e214e42a74b6f2e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index debeea2b3ae5..00056e76ea3d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -588,6 +588,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7F70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x7F70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma, iwl_ax411_killer_1690s_name), From 0093cb194a7511d1e68865fa35b763c72e44c2f0 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 5 May 2025 09:19:38 -0700 Subject: [PATCH 204/239] ice: use DSN instead of PCI BDF for ice_adapter index Use Device Serial Number instead of PCI bus/device/function for the index of struct ice_adapter. Functions on the same physical device should point to the very same ice_adapter instance, but with two PFs, when at least one of them is PCI-e passed-through to a VM, it is no longer the case - PFs will get seemingly random PCI BDF values, and thus indices, what finally leds to each of them being on their own instance of ice_adapter. That causes them to don't attempt any synchronization of the PTP HW clock usage, or any other future resources. DSN works nicely in place of the index, as it is "immutable" in terms of virtualization. Fixes: 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") Suggested-by: Jacob Keller Suggested-by: Jakub Kicinski Suggested-by: Jiri Pirko Reviewed-by: Aleksandr Loktionov Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250505161939.2083581-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_adapter.c | 47 ++++++++------------ drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090..66e070095d1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b3..ac15c0d2bc1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ From 08e9f2d584c4732180edee4cb2dbfa7586d7d5a3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 5 May 2025 22:47:13 +0300 Subject: [PATCH 205/239] net: Lock netdevices during dev_shutdown __qdisc_destroy() calls into various qdiscs .destroy() op, which in turn can call .ndo_setup_tc(), which requires the netdev instance lock. This commit extends the critical section in unregister_netdevice_many_notify() to cover dev_shutdown() (and dev_tcx_uninstall() as a side-effect) and acquires the netdev instance lock in __dev_change_net_namespace() for the other dev_shutdown() call. This should now guarantee that for all qdisc ops, the netdev instance lock is held during .ndo_setup_tc(). Fixes: a0527ee2df3f ("net: hold netdev instance lock during qdisc ndo_setup_tc") Signed-off-by: Cosmin Ratiu Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250505194713.1723399-1-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1be7cb73a602..92e004c354ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11966,9 +11966,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -12161,7 +12161,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. From 35076d2223c731f7be75af61e67f90807384d030 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 6 May 2025 18:18:50 +0800 Subject: [PATCH 206/239] erofs: ensure the extra temporary copy is valid for shortened bvecs When compressed data deduplication is enabled, multiple logical extents may reference the same compressed physical cluster. The previous commit 94c43de73521 ("erofs: fix wrong primary bvec selection on deduplicated extents") already avoids using shortened bvecs. However, in such cases, the extra temporary buffers also need to be preserved for later use in z_erofs_fill_other_copies() to to prevent data corruption. IOWs, extra temporary buffers have to be retained not only due to varying start relative offsets (`pageofs_out`, as indicated by `pcl->multibases`) but also because of shortened bvecs. android.hardware.graphics.composer@2.1.so : 270696 bytes 0: 0.. 204185 | 204185 : 628019200.. 628084736 | 65536 -> 1: 204185.. 225536 | 21351 : 544063488.. 544129024 | 65536 2: 225536.. 270696 | 45160 : 0.. 0 | 0 com.android.vndk.v28.apex : 93814897 bytes ... 364: 53869896..54095257 | 225361 : 543997952.. 544063488 | 65536 -> 365: 54095257..54309344 | 214087 : 544063488.. 544129024 | 65536 366: 54309344..54514557 | 205213 : 544129024.. 544194560 | 65536 ... Both 204185 and 54095257 have the same start relative offset of 3481, but the logical page 55 of `android.hardware.graphics.composer@2.1.so` ranges from 225280 to 229632, forming a shortened bvec [225280, 225536) that cannot be used for decompressing the range from 54095257 to 54309344 of `com.android.vndk.v28.apex`. Since `pcl->multibases` is already meaningless, just mark `be->keepxcpy` on demand for simplicity. Again, this issue can only lead to data corruption if `-Ededupe` is on. Fixes: 94c43de73521 ("erofs: fix wrong primary bvec selection on deduplicated extents") Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250506101850.191506-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5c061aaeeb45..b8e6b76c23d5 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -79,9 +79,6 @@ struct z_erofs_pcluster { /* L: whether partial decompression or not */ bool partial; - /* L: indicate several pageofs_outs or not */ - bool multibases; - /* L: whether extra buffer allocations are best-effort */ bool besteffort; @@ -1046,8 +1043,6 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, break; erofs_onlinefolio_split(folio); - if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - f->pcl->multibases = true; if (f->pcl->length < offset + end - map->m_la) { f->pcl->length = offset + end - map->m_la; f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; @@ -1093,7 +1088,6 @@ struct z_erofs_backend { struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; struct super_block *sb; struct z_erofs_pcluster *pcl; - /* pages with the longest decompressed length for deduplication */ struct page **decompressed_pages; /* pages to keep the compressed data */ @@ -1102,6 +1096,8 @@ struct z_erofs_backend { struct list_head decompressed_secondary_bvecs; struct page **pagepool; unsigned int onstack_used, nr_pages; + /* indicate if temporary copies should be preserved for later use */ + bool keepxcpy; }; struct z_erofs_bvec_item { @@ -1112,18 +1108,20 @@ struct z_erofs_bvec_item { static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, struct z_erofs_bvec *bvec) { + int poff = bvec->offset + be->pcl->pageofs_out; struct z_erofs_bvec_item *item; - unsigned int pgnr; + struct page **page; - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && - (bvec->end == PAGE_SIZE || - bvec->offset + bvec->end == be->pcl->length)) { - pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; - DBG_BUGON(pgnr >= be->nr_pages); - if (!be->decompressed_pages[pgnr]) { - be->decompressed_pages[pgnr] = bvec->page; + if (!(poff & ~PAGE_MASK) && (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { + DBG_BUGON((poff >> PAGE_SHIFT) >= be->nr_pages); + page = be->decompressed_pages + (poff >> PAGE_SHIFT); + if (!*page) { + *page = bvec->page; return; } + } else { + be->keepxcpy = true; } /* (cold path) one pcluster is requested multiple times */ @@ -1289,7 +1287,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) .alg = pcl->algorithmformat, .inplace_io = overlapped, .partial_decoding = pcl->partial, - .fillgaps = pcl->multibases, + .fillgaps = be->keepxcpy, .gfp = pcl->besteffort ? GFP_KERNEL : GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); @@ -1346,7 +1344,6 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) pcl->length = 0; pcl->partial = true; - pcl->multibases = false; pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; From 78cd408356fe3edbac66598772fd347bf3e32c1f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 5 May 2025 18:19:19 -0700 Subject: [PATCH 207/239] net: add missing instance lock to dev_set_promiscuity Accidentally spotted while trying to understand what else needs to be renamed to netif_ prefix. Most of the calls to dev_set_promiscuity are adjacent to dev_set_allmulti or dev_disable_lro so it should be safe to add the lock. Note that new netif_set_promiscuity is currently unused, the locked paths call __dev_set_promiscuity directly. Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250506011919.2882313-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 14 +------------- net/core/dev_api.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d11d013cabe..7ea022750e4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4972,6 +4972,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); diff --git a/net/core/dev.c b/net/core/dev.c index 92e004c354ea..11da1e272ec2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9193,18 +9193,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9216,7 +9205,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90898cd540ce..f9a160ab596f 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -267,6 +267,29 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +/** + * dev_set_promiscuity() - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. + */ +int dev_set_promiscuity(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_promiscuity(dev, inc); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_promiscuity); + /** * dev_set_allmulti() - update allmulti count on a device * @dev: device From 42420c50c68f3e95e90de2479464f420602229fc Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:18 +0200 Subject: [PATCH 208/239] s390/pci: Fix missing check for zpci_create_device() error return The zpci_create_device() function returns an error pointer that needs to be checked before dereferencing it as a struct zpci_dev pointer. Add the missing check in __clp_add() where it was missed when adding the scan_list in the fixed commit. Simply not adding the device to the scan list results in the previous behavior. Cc: stable@vger.kernel.org Fixes: 0467cdde8c43 ("s390/pci: Sort PCI functions prior to creating virtual busses") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_clp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9a929bbcc397..241f7251c873 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -428,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } From 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:19 +0200 Subject: [PATCH 209/239] s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs With commit bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") the code to ignore power off of a PF that has child VFs was changed from a direct return to a goto to the unlock and pci_dev_put() section. The change however left the existing pci_dev_put() untouched resulting in a doubple put. This can subsequently cause a use after free if the struct pci_dev is released in an unexpected state. Fix this by removing the extra pci_dev_put(). Cc: stable@vger.kernel.org Fixes: bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens --- drivers/pci/hotplug/s390_pci_hpc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 055518ee354d..e9e9aaa91770 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -59,7 +59,6 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); rc = -EBUSY; goto out; } From e34090d7214e0516eb8722aee295cb2507317c07 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 3 May 2025 01:01:18 +0300 Subject: [PATCH 210/239] ipvs: fix uninit-value for saddr in do_output_route4 syzbot reports for uninit-value for the saddr argument [1]. commit 4754957f04f5 ("ipvs: do not use random local source address for tunnels") already implies that the input value of saddr should be ignored but the code is still reading it which can prevent to connect the route. Fix it by changing the argument to ret_saddr. [1] BUG: KMSAN: uninit-value in do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 __ip_vs_get_out_rt+0x403/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:330 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4167 [inline] slab_alloc_node mm/slub.c:4210 [inline] __kmalloc_cache_noprof+0x8fa/0xe00 mm/slub.c:4367 kmalloc_noprof include/linux/slab.h:905 [inline] ip_vs_dest_dst_alloc net/netfilter/ipvs/ip_vs_xmit.c:61 [inline] __ip_vs_get_out_rt+0x35d/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:323 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e CPU: 0 UID: 0 PID: 22408 Comm: syz.4.5165 Not tainted 6.15.0-rc3-syzkaller-00019-gbc3372351d0c #0 PREEMPT(undef) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Reported-by: syzbot+04b9a82855c8aed20860@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68138dfa.050a0220.14dd7d.0017.GAE@google.com/ Fixes: 4754957f04f5 ("ipvs: do not use random local source address for tunnels") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3313bceb6cc9..014f07740369 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) return false; } -/* Get route to daddr, update *saddr, optionally bind route to saddr */ +/* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - int rt_mode, __be32 *saddr) + int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; - bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { - /* Invalid saddr ? */ - if (PTR_ERR(rt) == -EINVAL && *saddr && - rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { - *saddr = 0; - flowi4_update_output(&fl4, 0, daddr, 0); - goto retry; - } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; - } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + } + if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); - *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, daddr, fl4.saddr); - loop = true; + rt_mode = 0; goto retry; } - *saddr = fl4.saddr; + if (ret_saddr) + *ret_saddr = fl4.saddr; return rt; } @@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { - __be32 saddr = htonl(INADDR_ANY); - noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; - if (ret_saddr) - *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; From 8478a729c0462273188263136880480729e9efca Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 7 May 2025 17:01:59 +0200 Subject: [PATCH 211/239] netfilter: ipset: fix region locking in hash types Region locking introduced in v5.6-rc4 contained three macros to handle the region locks: ahash_bucket_start(), ahash_bucket_end() which gave back the start and end hash bucket values belonging to a given region lock and ahash_region() which should give back the region lock belonging to a given hash bucket. The latter was incorrect which can lead to a race condition between the garbage collector and adding new elements when a hash type of set is defined with timeouts. Fixes: f66ee0410b1c ("netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports") Reported-by: Kota Toda Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cf3ce72c3de6..5251524b96af 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -64,7 +64,7 @@ struct hbucket { #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ - ((n) % ahash_numof_locks(htable_bits)) + ((n) / jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) From 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 May 2025 16:28:54 +0200 Subject: [PATCH 212/239] openvswitch: Fix unsafe attribute parsing in output_userspace() This patch replaces the manual Netlink attribute iteration in output_userspace() with nla_for_each_nested(), which ensures that only well-formed attributes are processed. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Eelco Chaudron Acked-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.1746541734.git.echaudro@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 61fea7baae5d..2f22ca59586f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; From 4a7843cc8a41b9612becccc07715ed017770eb89 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 6 May 2025 18:56:47 +0200 Subject: [PATCH 213/239] net: airoha: Add missing field to ppe_mbox_data struct The official Airoha EN7581 firmware requires adding max_packet field in ppe_mbox_data struct while the unofficial one used to develop the Airoha EN7581 flowtable support does not require this field. This patch does not introduce any real backwards compatible issue since EN7581 fw is not publicly available in linux-firmware or other repositories (e.g. OpenWrt) yet and the official fw version will use this new layout. For this reason this change needs to be backported. Moreover, make explicit the padding added by the compiler introducing the rsv array in init_info struct. At the same time use u32 instead of int for init_info and set_info struct definitions in ppe_mbox_data struct. Fixes: 23290c7bc190d ("net: airoha: Introduce Airoha NPU support") Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250506-airoha-en7581-fix-ppe_mbox_data-v5-1-29cabed6864d@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 7a5710f9ccf6..ead0625e781f 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -104,12 +104,14 @@ struct ppe_mbox_data { u8 xpon_hal_api; u8 wan_xsi; u8 ct_joyme4; - int ppe_type; - int wan_mode; - int wan_sel; + u8 max_packet; + u8 rsv[3]; + u32 ppe_type; + u32 wan_mode; + u32 wan_sel; } init_info; struct { - int func_id; + u32 func_id; u32 size; u32 data; } set_info; From c4327229948879814229b46aa26a750718888503 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:04 +0200 Subject: [PATCH 214/239] bpf: Scrub packet on bpf_redirect_peer When bpf_redirect_peer is used to redirect packets to a device in another network namespace, the skb isn't scrubbed. That can lead skb information from one namespace to be "misused" in another namespace. As one example, this is causing Cilium to drop traffic when using bpf_redirect_peer to redirect packets that just went through IPsec decryption to a container namespace. The following pwru trace shows (1) the packet path from the host's XFRM layer to the container's XFRM layer where it's dropped and (2) the number of active skb extensions at each function. NETNS MARK IFACE TUPLE FUNC 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm4_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 gro_cells_receive .active_extensions = (__u8)2, [...] 4026533547 0 eth0 10.244.3.124:35473->10.244.2.158:53 skb_do_redirect .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv_core .active_extensions = (__u8)2, [...] 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 udp_queue_rcv_one_skb .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_policy_check .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 security_xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 kfree_skb_reason(SKB_DROP_REASON_XFRM_POLICY) .active_extensions = (__u8)2, In this case, there are no XFRM policies in the container's network namespace so the drop is unexpected. When we decrypt the IPsec packet, the XFRM state used for decryption is set in the skb extensions. This information is preserved across the netns switch. When we reach the XFRM policy check in the container's netns, __xfrm_policy_check drops the packet with LINUX_MIB_XFRMINNOPOLS because a (container-side) XFRM policy can't be found that matches the (host-side) XFRM state used for decryption. This patch fixes this by scrubbing the packet when using bpf_redirect_peer, as is done on typical netns switches via veth devices except skb->mark and skb->tstamp are not zeroed. Fixes: 9aa1206e8f482 ("bpf: Add redirect_peer helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/1728ead5e0fe45e7a6542c36bd4e3ca07a73b7d6.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 79cab4d78dc3..577a4504e26f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2509,6 +2509,7 @@ int skb_do_redirect(struct sk_buff *skb) goto out_drop; skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? From f5c79ffdc250bc8c90fd4fdf1e5d7ac4647912d5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:39 +0200 Subject: [PATCH 215/239] bpf: Clarify handling of mark and tstamp by redirect_peer When switching network namespaces with the bpf_redirect_peer helper, the skb->mark and skb->tstamp fields are not zeroed out like they can be on a typical netns switch. This patch clarifies that in the helper description. Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/ccc86af26d43c5c0b776bcba2601b7479c0d46d0.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 3 +++ tools/include/uapi/linux/bpf.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The From e5641daa0ea1932e2b0fa7f27843e712244f6538 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:44 +0530 Subject: [PATCH 216/239] net: ti: icssg-prueth: Set XDP feature flags for ndev xdp_features demonstrates what all XDP capabilities are supported on a given network device. The driver needs to set these xdp_features flag to let the network stack know what XDP features a given driver is supporting. These flags need to be set for a given ndev irrespective of any XDP program being loaded or not. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-2-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 443f90fa6557..ee35fecf61e7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1109,11 +1109,6 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->prog; - xdp_features_t val; - - val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - xdp_set_features_flag(emac->ndev, val); if (!emac->xdpi.prog && !prog) return 0; @@ -1291,6 +1286,10 @@ static int prueth_netdev_init(struct prueth *prueth, ndev->hw_features = NETIF_F_SG; ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->hw_features |= NETIF_PRUETH_HSR_OFFLOAD_FEATURES; + xdp_set_features_flag(ndev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, From 8b3fae3e2376b70b7a76005263bc34d034b9c7bf Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:45 +0530 Subject: [PATCH 217/239] net: ti: icssg-prueth: Fix kernel panic during concurrent Tx queue access Add __netif_tx_lock() to ensure that only one packet is being transmitted at a time to avoid race conditions in the netif_txq struct and prevent packet data corruption. Failing to do so causes kernel panic with the following error: [ 2184.746764] ------------[ cut here ]------------ [ 2184.751412] kernel BUG at lib/dynamic_queue_limits.c:99! [ 2184.756728] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP logs: https://gist.github.com/MeghanaMalladiTI/9c7aa5fc3b7fb03f87c74aad487956e9 The lock is acquired before calling emac_xmit_xdp_frame() and released after the call returns. This ensures that the TX queue is protected from concurrent access during the transmission of XDP frames. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-3-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 7 ++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b4be76e13a2f..38a80e004d1c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -650,6 +650,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, struct page *page, u32 *len) { struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct bpf_prog *xdp_prog; struct xdp_frame *xdpf; u32 pkt_len = *len; @@ -669,8 +671,11 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, goto drop; } - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); + __netif_tx_lock(netif_txq, cpu); result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + __netif_tx_unlock(netif_txq); if (result == ICSSG_XDP_CONSUMED) { ndev->stats.tx_dropped++; goto drop; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index ee35fecf61e7..86fc1278127c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1075,17 +1075,21 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame { struct prueth_emac *emac = netdev_priv(dev); struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; unsigned int q_idx; int nxmit = 0; u32 err; int i; - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; + __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); @@ -1095,6 +1099,7 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame } nxmit++; } + __netif_tx_unlock(netif_txq); return nxmit; } From 1884fc85ae6ed0652fa324c66b1d89e25174e73b Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:46 +0530 Subject: [PATCH 218/239] net: ti: icssg-prueth: Report BQL before sending XDP packets When sending out any kind of traffic, it is essential that the driver keeps reporting BQL of the number of bytes that have been sent so that BQL can track the amount of data in the queue and prevents it from overflowing. If BQL is not reported, the driver may continue sending packets even when the queue is full, leading to packet loss, congestion and decreased network performance. Currently this is missing in emac_xmit_xdp_frame() and this patch fixes it. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-4-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 38a80e004d1c..d88a0180294e 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -187,7 +187,6 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, xdp_return_frame(xdpf); break; default: - netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type); prueth_xmit_free(tx_chn, desc_tx); ndev->stats.tx_dropped++; continue; @@ -567,6 +566,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, { struct cppi5_host_desc_t *first_desc; struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; struct prueth_swdata *swdata; @@ -620,12 +620,17 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, swdata->data.xdpf = xdpf; } + /* Report BQL before sending the packet */ + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + netdev_tx_sent_queue(netif_txq, xdpf->len); + cppi5_hdesc_set_pktlen(first_desc, xdpf->len); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "xdp tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, xdpf->len); goto drop_free_descs; } @@ -984,6 +989,7 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, pkt_len); goto drop_free_descs; } From 5f93185a757ff38b36f849c659aeef368db15a68 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:00 +0200 Subject: [PATCH 219/239] net: dsa: b53: allow leaky reserved multicast Allow reserved multicast to ignore VLAN membership so STP and other management protocols work without a PVID VLAN configured when using a vlan aware bridge. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index e5ba71897906..62866165ad03 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -373,9 +373,11 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5); } + vc1 &= ~VC1_RX_MCST_FWD_EN; + if (enable) { vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID; - vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN; + vc1 |= VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; if (enable_filtering) { vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; @@ -393,7 +395,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, } else { vc0 &= ~(VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID); - vc1 &= ~(VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN); + vc1 &= ~VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; vc5 &= ~VC5_DROP_VTABLE_MISS; From 425f11d4cc9bd9e97e6825d9abb2c51a068ca7b5 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:01 +0200 Subject: [PATCH 220/239] net: dsa: b53: keep CPU port always tagged again The Broadcom management header does not carry the original VLAN tag state information, just the ingress port, so for untagged frames we do not know from which VLAN they originated. Therefore keep the CPU port always tagged except for VLAN 0. Fixes the following setup: $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master br0 $ bridge vlan add dev br0 pvid untagged self $ ip link add sw1p2.10 link sw1p2 type vlan id 10 Where VID 10 would stay untagged on the CPU port. Fixes: 2c32a3d3c233 ("net: dsa: b53: Do not force CPU to be always tagged") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 62866165ad03..9d4fb54b4ced 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1135,6 +1135,11 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_bridge_pvid = dev->tag_protocol == DSA_TAG_PROTO_NONE; + /* The switch does not tell us the original VLAN for untagged + * packets, so keep the CPU port always tagged. + */ + ds->untag_vlan_aware_bridge_pvid = true; + ret = b53_reset_switch(dev); if (ret) { dev_err(ds->dev, "failed to reset switch\n"); @@ -1545,6 +1550,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) untagged = true; + if (vlan->vid > 0 && dsa_is_cpu_port(ds, port)) + untagged = false; + vl->members |= BIT(port); if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag |= BIT(port); From f480851981043d9bb6447ca9883ade9247b9a0ad Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:02 +0200 Subject: [PATCH 221/239] net: dsa: b53: fix clearing PVID of a port Currently the PVID of ports are only set when adding/updating VLANs with PVID set or removing VLANs, but not when clearing the PVID flag of a VLAN. E.g. the following flow $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master bridge $ bridge vlan add dev sw1p1 vid 10 pvid untagged $ bridge vlan add dev sw1p1 vid 10 untagged Would keep the PVID set as 10, despite the flag being cleared. Fix this by checking if we need to unset the PVID on vlan updates. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9d4fb54b4ced..65d74c455c57 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1537,12 +1537,21 @@ int b53_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; + u16 old_pvid, new_pvid; int err; err = b53_vlan_prepare(ds, port, vlan); if (err) return err; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + if (pvid) + new_pvid = vlan->vid; + else if (!pvid && vlan->vid == old_pvid) + new_pvid = b53_default_pvid(dev); + else + new_pvid = old_pvid; + vl = &dev->vlans[vlan->vid]; b53_get_vlan_entry(dev, vlan->vid, vl); @@ -1562,9 +1571,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); - if (pvid && !dsa_is_cpu_port(ds, port)) { + if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), - vlan->vid); + new_pvid); b53_fast_age_vlan(dev, vlan->vid); } From 083c6b28c0cbcd83b6af1a10f2c82937129b3438 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:03 +0200 Subject: [PATCH 222/239] net: dsa: b53: fix flushing old pvid VLAN on pvid change Presumably the intention here was to flush the VLAN of the old pvid, not the added VLAN again, which we already flushed before. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-5-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 65d74c455c57..c67c0b5fbc1b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1574,7 +1574,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), new_pvid); - b53_fast_age_vlan(dev, vlan->vid); + b53_fast_age_vlan(dev, old_pvid); } return 0; From a1c1901c5cc881425cc45992ab6c5418174e9e5a Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:04 +0200 Subject: [PATCH 223/239] net: dsa: b53: fix VLAN ID for untagged vlan on bridge leave The untagged default VLAN is added to the default vlan, which may be one, but we modify the VLAN 0 entry on bridge leave. Fix this to use the correct VLAN entry for the default pvid. Fixes: fea83353177a ("net: dsa: b53: Fix default VLAN ID") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-6-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c67c0b5fbc1b..c60b552b945c 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1986,7 +1986,7 @@ EXPORT_SYMBOL(b53_br_join); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { struct b53_device *dev = ds->priv; - struct b53_vlan *vl = &dev->vlans[0]; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; @@ -2012,6 +2012,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) dev->ports[port].vlan_ctl_mask = pvlan; pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { From 13b152ae40495966501697693f048f47430c50fd Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:05 +0200 Subject: [PATCH 224/239] net: dsa: b53: always rejoin default untagged VLAN on bridge leave While JOIN_ALL_VLAN allows to join all VLANs, we still need to keep the default VLAN enabled so that untagged traffic stays untagged. So rejoin the default VLAN even for switches with JOIN_ALL_VLAN support. Fixes: 48aea33a77ab ("net: dsa: b53: Add JOIN_ALL_VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-7-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c60b552b945c..4871e117f5ef 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2021,12 +2021,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) if (!(reg & BIT(cpu_port))) reg |= BIT(cpu_port); b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } else { - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); } + + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); } EXPORT_SYMBOL(b53_br_leave); From 45e9d59d39503bb3e6ab4d258caea4ba6496e2dc Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:06 +0200 Subject: [PATCH 225/239] net: dsa: b53: do not allow to configure VLAN 0 Since we cannot set forwarding destinations per VLAN, we should not have a VLAN 0 configured, as it would allow untagged traffic to work across ports on VLAN aware bridges regardless if a PVID untagged VLAN exists. So remove the VLAN 0 on join, an re-add it on leave. But only do so if we have a VLAN aware bridge, as without it, untagged traffic would become tagged with VID 0 on a VLAN unaware bridge. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-8-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 36 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 4871e117f5ef..0b28791cca52 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1544,6 +1544,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (err) return err; + if (vlan->vid == 0) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); if (pvid) new_pvid = vlan->vid; @@ -1556,10 +1559,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vlan->vid, vl); - if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) - untagged = true; - - if (vlan->vid > 0 && dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) untagged = false; vl->members |= BIT(port); @@ -1589,6 +1589,9 @@ int b53_vlan_del(struct dsa_switch *ds, int port, struct b53_vlan *vl; u16 pvid; + if (vlan->vid == 0) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); vl = &dev->vlans[vlan->vid]; @@ -1935,8 +1938,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - u16 pvlan, reg; + u16 pvlan, reg, pvid; unsigned int i; /* On 7278, port 7 which connects to the ASP should only receive @@ -1945,6 +1949,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, if (dev->chip_id == BCM7278_DEVICE_ID && port == 7) return -EINVAL; + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + /* Make this port leave the all VLANs join since we will have proper * VLAN entries from now on */ @@ -1956,6 +1963,15 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } + if (ds->vlan_filtering) { + b53_get_vlan_entry(dev, pvid, vl); + vl->members &= ~BIT(port); + if (vl->members == BIT(cpu_port)) + vl->members &= ~BIT(cpu_port); + vl->untag = vl->members; + b53_set_vlan_entry(dev, pvid, vl); + } + b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); b53_for_each_port(dev, i) { @@ -2023,10 +2039,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); + if (ds->vlan_filtering) { + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); + } } EXPORT_SYMBOL(b53_br_leave); From f089652b6b16452535dcc5cbaa6e2bb05acd3f93 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:07 +0200 Subject: [PATCH 226/239] net: dsa: b53: do not program vlans when vlan filtering is off Documentation/networking/switchdev.rst says: - with VLAN filtering turned off: the bridge is strictly VLAN unaware and its data path will process all Ethernet frames as if they are VLAN-untagged. The bridge VLAN database can still be modified, but the modifications should have no effect while VLAN filtering is turned off. This breaks if we immediately apply the VLAN configuration, so skip writing it when vlan_filtering is off. Fixes: 0ee2af4ebbe3 ("net: dsa: set configure_vlan_while_not_filtering to true by default") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-9-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 48 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0b28791cca52..ee2f1be62618 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1547,6 +1547,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; + if (!ds->vlan_filtering) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); if (pvid) new_pvid = vlan->vid; @@ -1592,6 +1595,9 @@ int b53_vlan_del(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; + if (!ds->vlan_filtering) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); vl = &dev->vlans[vlan->vid]; @@ -1952,18 +1958,20 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - /* Make this port leave the all VLANs join since we will have proper - * VLAN entries from now on - */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg &= ~BIT(port); - if ((reg & BIT(cpu_port)) == BIT(cpu_port)) - reg &= ~BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } - if (ds->vlan_filtering) { + /* Make this port leave the all VLANs join since we will have + * proper VLAN entries from now on + */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + ®); + reg &= ~BIT(port); + if ((reg & BIT(cpu_port)) == BIT(cpu_port)) + reg &= ~BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + reg); + } + b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); if (vl->members == BIT(cpu_port)) @@ -2030,16 +2038,16 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - /* Make this port join all VLANs without VLAN entries */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg |= BIT(port); - if (!(reg & BIT(cpu_port))) - reg |= BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } - if (ds->vlan_filtering) { + /* Make this port join all VLANs without VLAN entries */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg |= BIT(port); + if (!(reg & BIT(cpu_port))) + reg |= BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } + b53_get_vlan_entry(dev, pvid, vl); vl->members |= BIT(port) | BIT(cpu_port); vl->untag |= BIT(port) | BIT(cpu_port); From 2dc2bd57111582895e10f54ea380329c89873f1c Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:08 +0200 Subject: [PATCH 227/239] net: dsa: b53: fix toggling vlan_filtering To allow runtime switching between vlan aware and vlan non-aware mode, we need to properly keep track of any bridge VLAN configuration. Likewise, we need to know when we actually switch between both modes, to not have to rewrite the full VLAN table every time we update the VLANs. So keep track of the current vlan_filtering mode, and on changes, apply the appropriate VLAN configuration. Fixes: 0ee2af4ebbe3 ("net: dsa: set configure_vlan_while_not_filtering to true by default") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-10-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 110 +++++++++++++++++++++---------- drivers/net/dsa/b53/b53_priv.h | 2 + 2 files changed, 78 insertions(+), 34 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ee2f1be62618..0a7749b416f7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -763,6 +763,22 @@ static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port) return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port); } +static bool b53_vlan_port_may_join_untagged(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + struct dsa_port *dp; + + if (!dev->vlan_filtering) + return true; + + dp = dsa_to_port(ds, port); + + if (dsa_port_is_cpu(dp)) + return true; + + return dp->bridge == NULL; +} + int b53_configure_vlan(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -781,7 +797,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering); + b53_enable_vlan(dev, -1, dev->vlan_enabled, dev->vlan_filtering); /* Create an untagged VLAN entry for the default PVID in case * CONFIG_VLAN_8021Q is disabled and there are no calls to @@ -789,26 +805,39 @@ int b53_configure_vlan(struct dsa_switch *ds) * entry. Do this only when the tagging protocol is not * DSA_TAG_PROTO_NONE */ + v = &dev->vlans[def_vid]; b53_for_each_port(dev, i) { - v = &dev->vlans[def_vid]; - v->members |= BIT(i); - if (!b53_vlan_port_needs_forced_tagged(ds, i)) - v->untag = v->members; - b53_write16(dev, B53_VLAN_PAGE, - B53_VLAN_PORT_DEF_TAG(i), def_vid); - } - - /* Upon initial call we have not set-up any VLANs, but upon - * system resume, we need to restore all VLAN entries. - */ - for (vid = def_vid; vid < dev->num_vlans; vid++) { - v = &dev->vlans[vid]; - - if (!v->members) + if (!b53_vlan_port_may_join_untagged(ds, i)) continue; - b53_set_vlan_entry(dev, vid, v); - b53_fast_age_vlan(dev, vid); + vl.members |= BIT(i); + if (!b53_vlan_port_needs_forced_tagged(ds, i)) + vl.untag = vl.members; + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(i), + def_vid); + } + b53_set_vlan_entry(dev, def_vid, &vl); + + if (dev->vlan_filtering) { + /* Upon initial call we have not set-up any VLANs, but upon + * system resume, we need to restore all VLAN entries. + */ + for (vid = def_vid + 1; vid < dev->num_vlans; vid++) { + v = &dev->vlans[vid]; + + if (!v->members) + continue; + + b53_set_vlan_entry(dev, vid, v); + b53_fast_age_vlan(dev, vid); + } + + b53_for_each_port(dev, i) { + if (!dsa_is_cpu_port(ds, i)) + b53_write16(dev, B53_VLAN_PAGE, + B53_VLAN_PORT_DEF_TAG(i), + dev->ports[i].pvid); + } } return 0; @@ -1127,7 +1156,9 @@ EXPORT_SYMBOL(b53_setup_devlink_resources); static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; unsigned int port; + u16 pvid; int ret; /* Request bridge PVID untagged when DSA_TAG_PROTO_NONE is set @@ -1146,6 +1177,15 @@ static int b53_setup(struct dsa_switch *ds) return ret; } + /* setup default vlan for filtering mode */ + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + b53_for_each_port(dev, port) { + vl->members |= BIT(port); + if (!b53_vlan_port_needs_forced_tagged(ds, port)) + vl->untag |= BIT(port); + } + b53_reset_mib(dev); ret = b53_apply_config(dev); @@ -1499,7 +1539,10 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, { struct b53_device *dev = ds->priv; - b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering); + if (dev->vlan_filtering != vlan_filtering) { + dev->vlan_filtering = vlan_filtering; + b53_apply_config(dev); + } return 0; } @@ -1524,7 +1567,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, if (vlan->vid >= dev->num_vlans) return -ERANGE; - b53_enable_vlan(dev, port, true, ds->vlan_filtering); + b53_enable_vlan(dev, port, true, dev->vlan_filtering); return 0; } @@ -1547,21 +1590,17 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; - if (!ds->vlan_filtering) - return 0; - - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + old_pvid = dev->ports[port].pvid; if (pvid) new_pvid = vlan->vid; else if (!pvid && vlan->vid == old_pvid) new_pvid = b53_default_pvid(dev); else new_pvid = old_pvid; + dev->ports[port].pvid = new_pvid; vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vlan->vid, vl); - if (dsa_is_cpu_port(ds, port)) untagged = false; @@ -1571,6 +1610,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, else vl->untag &= ~BIT(port); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1595,23 +1637,22 @@ int b53_vlan_del(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; - if (!ds->vlan_filtering) - return 0; - - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); + pvid = dev->ports[port].pvid; vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vlan->vid, vl); - vl->members &= ~BIT(port); if (pvid == vlan->vid) pvid = b53_default_pvid(dev); + dev->ports[port].pvid = pvid; if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag &= ~(BIT(port)); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1958,7 +1999,7 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - if (ds->vlan_filtering) { + if (dev->vlan_filtering) { /* Make this port leave the all VLANs join since we will have * proper VLAN entries from now on */ @@ -2038,7 +2079,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - if (ds->vlan_filtering) { + if (dev->vlan_filtering) { /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); @@ -2803,6 +2844,7 @@ struct b53_device *b53_switch_alloc(struct device *base, ds->ops = &b53_switch_ops; ds->phylink_mac_ops = &b53_phylink_mac_ops; dev->vlan_enabled = true; + dev->vlan_filtering = false; /* Let DSA handle the case were multiple bridges span the same switch * device and different VLAN awareness settings are requested, which * would be breaking filtering semantics for any of the other bridge diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 0166c37a13a7..4636e27fd1ee 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -96,6 +96,7 @@ struct b53_pcs { struct b53_port { u16 vlan_ctl_mask; + u16 pvid; struct ethtool_keee eee; }; @@ -147,6 +148,7 @@ struct b53_device { unsigned int num_vlans; struct b53_vlan *vlans; bool vlan_enabled; + bool vlan_filtering; unsigned int num_ports; struct b53_port *ports; From 9f34ad89bcf0e6df6f8b01f1bdab211493fc66d1 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:09 +0200 Subject: [PATCH 228/239] net: dsa: b53: fix learning on VLAN unaware bridges When VLAN filtering is off, we configure the switch to forward, but not learn on VLAN table misses. This effectively disables learning while not filtering. Fix this by switching to forward and learn. Setting the learning disable register will still control whether learning actually happens. Fixes: dad8d7c6452b ("net: dsa: b53: Properly account for VLAN filtering") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-11-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0a7749b416f7..a2c0b44fc6be 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -383,7 +383,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; vc5 |= VC5_DROP_VTABLE_MISS; } else { - vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S; + vc4 |= VC4_NO_ING_VID_CHK << VC4_ING_VID_CHECK_S; vc5 &= ~VC5_DROP_VTABLE_MISS; } From 2e7179c628d3cb9aee75e412473813b099e11ed4 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:10 +0200 Subject: [PATCH 229/239] net: dsa: b53: do not set learning and unicast/multicast on up When a port gets set up, b53 disables learning and enables the port for flooding. This can undo any bridge configuration on the port. E.g. the following flow would disable learning on a port: $ ip link add br0 type bridge $ ip link set sw1p1 master br0 <- enables learning for sw1p1 $ ip link set br0 up $ ip link set sw1p1 up <- disables learning again Fix this by populating dsa_switch_ops::port_setup(), and set up initial config there. Fixes: f9b3827ee66c ("net: dsa: b53: Support setting learning on port") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-12-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 21 +++++++++++++-------- drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 1 + 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a2c0b44fc6be..9eb39cfa5fb2 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -578,6 +578,18 @@ static void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg); } +int b53_setup_port(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + b53_port_set_ucast_flood(dev, port, true); + b53_port_set_mcast_flood(dev, port, true); + b53_port_set_learning(dev, port, false); + + return 0; +} +EXPORT_SYMBOL(b53_setup_port); + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; @@ -590,10 +602,6 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); - if (dev->ops->irq_enable) ret = dev->ops->irq_enable(dev, port); if (ret) @@ -724,10 +732,6 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); b53_brcm_hdr_setup(dev->ds, port); - - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -2387,6 +2391,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_read = b53_phy_read16, .phy_write = b53_phy_write16, .phylink_get_caps = b53_phylink_get_caps, + .port_setup = b53_setup_port, .port_enable = b53_enable_port, .port_disable = b53_disable_port, .support_eee = b53_support_eee, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 4636e27fd1ee..2cf3e6a81e37 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -384,6 +384,7 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +int b53_setup_port(struct dsa_switch *ds, int port); int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_disable_port(struct dsa_switch *ds, int port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index fa2bf3fa9019..454a8c7fd7ee 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1230,6 +1230,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .resume = bcm_sf2_sw_resume, .get_wol = bcm_sf2_sw_get_wol, .set_wol = bcm_sf2_sw_set_wol, + .port_setup = b53_setup_port, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, .support_eee = b53_support_eee, From f34343cc11afc7bb1f881c3492bee3484016bf71 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:39 -0700 Subject: [PATCH 230/239] fbnic: Fix initialization of mailbox descriptor rings Address to issues with the FW mailbox descriptor initialization. We need to reverse the order of accesses when we invalidate an entry versus writing an entry. When writing an entry we write upper and then lower as the lower 32b contain the valid bit that makes the entire address valid. However for invalidation we should write it in the reverse order so that the upper is marked invalid before we update it. Without this change we may see FW attempt to access pages with the upper 32b of the address set to 0 which will likely result in DMAR faults due to write access failures on mailbox shutdown. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654717972.499179.8083789731819297034.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 32 ++++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 88db3dacb940..c4956f0a741e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -17,11 +17,29 @@ static void __fbnic_mbx_wr_desc(struct fbnic_dev *fbd, int mbx_idx, { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + /* Write the upper 32b and then the lower 32b. Doing this the + * FW can then read lower, upper, lower to verify that the state + * of the descriptor wasn't changed mid-transaction. + */ fw_wr32(fbd, desc_offset + 1, upper_32_bits(desc)); fw_wrfl(fbd); fw_wr32(fbd, desc_offset, lower_32_bits(desc)); } +static void __fbnic_mbx_invalidate_desc(struct fbnic_dev *fbd, int mbx_idx, + int desc_idx, u32 desc) +{ + u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + + /* For initialization we write the lower 32b of the descriptor first. + * This way we can set the state to mark it invalid before we clear the + * upper 32b. + */ + fw_wr32(fbd, desc_offset, desc); + fw_wrfl(fbd); + fw_wr32(fbd, desc_offset + 1, 0); +} + static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); @@ -41,21 +59,17 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) * solid stop for the firmware to hit when it is done looping * through the ring. */ - __fbnic_mbx_wr_desc(fbd, mbx_idx, 0, 0); - - fw_wrfl(fbd); + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, 0, 0); /* We then fill the rest of the ring starting at the end and moving * back toward descriptor 0 with skip descriptors that have no * length nor address, and tell the firmware that they can skip * them and just move past them to the one we initialized to 0. */ - for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) { - __fbnic_mbx_wr_desc(fbd, mbx_idx, desc_idx, - FBNIC_IPC_MBX_DESC_FW_CMPL | - FBNIC_IPC_MBX_DESC_HOST_CMPL); - fw_wrfl(fbd); - } + for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, desc_idx, + FBNIC_IPC_MBX_DESC_FW_CMPL | + FBNIC_IPC_MBX_DESC_HOST_CMPL); } void fbnic_mbx_init(struct fbnic_dev *fbd) From 3b12f00ddd08e888273b2ac0488d396d90a836fc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:46 -0700 Subject: [PATCH 231/239] fbnic: Gate AXI read/write enabling on FW mailbox In order to prevent the device from throwing spurious writes and/or reads at us we need to gate the AXI fabric interface to the PCIe until such time as we know the FW is in a known good state. To accomplish this we use the mailbox as a mechanism for us to recognize that the FW has acknowledged our presence and is no longer sending any stale message data to us. We start in fbnic_mbx_init by calling fbnic_mbx_reset_desc_ring function, disabling the DMA in both directions, and then invalidating all the descriptors in each ring. We then poll the mailbox in fbnic_mbx_poll_tx_ready and when the interrupt is set by the FW we pick it up and mark the mailboxes as ready, while also enabling the DMA. Once we have completed all the transactions and need to shut down we call into fbnic_mbx_clean which will in turn call fbnic_mbx_reset_desc_ring for each ring and shut down the DMA and once again invalidate the descriptors. Fixes: 3646153161f1 ("eth: fbnic: Add register init to set PCIe/Ethernet device config") Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654718623.499179.7445197308109347982.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 2 ++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 38 +++++++++++++++++---- drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 6 ---- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 3b12a0ab5906..51bee8072420 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -796,8 +796,10 @@ enum { /* PUL User Registers */ #define FBNIC_CSR_START_PUL_USER 0x31000 /* CSR section delimiter */ #define FBNIC_PUL_OB_TLP_HDR_AW_CFG 0x3103d /* 0xc40f4 */ +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME CSR_BIT(18) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG 0x3103e /* 0xc40f8 */ +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME CSR_BIT(18) #define FBNIC_PUL_USER_OB_RD_TLP_CNT_31_0 \ 0x3106e /* 0xc41b8 */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index c4956f0a741e..f4749bfd840c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -51,10 +51,26 @@ static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) return desc; } -static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_reset_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int desc_idx; + /* Disable DMA transactions from the device, + * and flush any transactions triggered during cleaning + */ + switch (mbx_idx) { + case FBNIC_IPC_MBX_RX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH); + break; + case FBNIC_IPC_MBX_TX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH); + break; + } + + wrfl(fbd); + /* Initialize first descriptor to all 0s. Doing this gives us a * solid stop for the firmware to hit when it is done looping * through the ring. @@ -90,7 +106,7 @@ void fbnic_mbx_init(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + fbnic_mbx_reset_desc_ring(fbd, i); } static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, @@ -155,7 +171,7 @@ static void fbnic_mbx_clean_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int i; - fbnic_mbx_init_desc_ring(fbd, mbx_idx); + fbnic_mbx_reset_desc_ring(fbd, mbx_idx); for (i = FBNIC_IPC_MBX_DESC_LEN; i--;) fbnic_mbx_unmap_and_free_msg(fbd, mbx_idx, i); @@ -354,7 +370,7 @@ static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) return (err == -EOPNOTSUPP) ? 0 : err; } -static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; @@ -366,10 +382,18 @@ static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) switch (mbx_idx) { case FBNIC_IPC_MBX_RX_IDX: + /* Enable DMA writes from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); + /* Make sure we have a page for the FW to write to */ fbnic_mbx_alloc_rx_msgs(fbd); break; case FBNIC_IPC_MBX_TX_IDX: + /* Enable DMA reads from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); + /* Force version to 1 if we successfully requested an update * from the firmware. This should be overwritten once we get * the actual version from the firmware in the capabilities @@ -386,7 +410,7 @@ static void fbnic_mbx_postinit(struct fbnic_dev *fbd) { int i; - /* We only need to do this on the first interrupt following init. + /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ @@ -396,7 +420,7 @@ static void fbnic_mbx_postinit(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_postinit_desc_ring(fbd, i); + fbnic_mbx_init_desc_ring(fbd, i); } /** @@ -894,7 +918,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) * avoid the mailbox getting stuck closed if the interrupt * is reset. */ - fbnic_mbx_init_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); + fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); msleep(200); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 14291401f463..dde4a37116e2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -79,12 +79,6 @@ static void fbnic_mac_init_axi(struct fbnic_dev *fbd) fbnic_init_readrq(fbd, FBNIC_QM_RNI_RBP_CTL, cls, readrq); fbnic_init_mps(fbd, FBNIC_QM_RNI_RDE_CTL, cls, mps); fbnic_init_mps(fbd, FBNIC_QM_RNI_RCM_CTL, cls, mps); - - /* Enable XALI AR/AW outbound */ - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, - FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, - FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); } static void fbnic_mac_init_qm(struct fbnic_dev *fbd) From 682a61281d1036962b68d651994cc407371daef5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:52 -0700 Subject: [PATCH 232/239] fbnic: Add additional handling of IRQs We have two issues that need to be addressed in our IRQ handling. One is the fact that we can end up double-freeing IRQs in the event of an exception handling error such as a PCIe reset/recovery that fails. To prevent that from becoming an issue we can use the msix_vector values to indicate that we have successfully requested/freed the IRQ by only setting or clearing them when we have completed the given action. The other issue is that we have several potential races in our IRQ path due to us manipulating the mask before the vector has been truly disabled. In order to handle that in the case of the FW mailbox we need to not auto-enable the IRQ and instead will be enabling/disabling it separately. In the case of the PCS vector we can mitigate this by unmapping it and synchronizing the IRQ before we clear the mask. The general order of operations after this change is now to request the interrupt, poll the FW mailbox to ready, and then enable the interrupt. For the shutdown we do the reverse where we disable the interrupt, flush any pending Tx, and then free the IRQ. I am renaming the enable/disable to request/free to be equivilent with the IRQ calls being used. We may see additions in the future to enable/disable the IRQs versus request/free them for certain use cases. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Fixes: 69684376eed5 ("eth: fbnic: Add link detection") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654719271.499179.3634535105127848325.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 8 +- drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 148 ++++++++++++------ .../net/ethernet/meta/fbnic/fbnic_netdev.c | 5 +- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 14 +- 4 files changed, 113 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 4ca7b99ef131..de6b1a340f55 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -154,14 +154,14 @@ struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev); void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd); -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd); +int fbnic_fw_request_mbx(struct fbnic_dev *fbd); +void fbnic_fw_free_mbx(struct fbnic_dev *fbd); void fbnic_hwmon_register(struct fbnic_dev *fbd); void fbnic_hwmon_unregister(struct fbnic_dev *fbd); -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd); -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd); +int fbnic_pcs_request_irq(struct fbnic_dev *fbd); +void fbnic_pcs_free_irq(struct fbnic_dev *fbd); void fbnic_napi_name_irqs(struct fbnic_dev *fbd); int fbnic_napi_request_irq(struct fbnic_dev *fbd, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 1bbc0e56f3a0..1c88a2bf3a7a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -19,69 +19,105 @@ static irqreturn_t fbnic_fw_msix_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } -/** - * fbnic_fw_enable_mbx - Configure and initialize Firmware Mailbox - * @fbd: Pointer to device to initialize - * - * This function will initialize the firmware mailbox rings, enable the IRQ - * and initialize the communication between the Firmware and the host. The - * firmware is expected to respond to the initialization by sending an - * interrupt essentially notifying the host that it has seen the - * initialization and is now synced up. - * - * Return: non-zero on failure. - **/ -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd) +static int __fbnic_fw_enable_mbx(struct fbnic_dev *fbd, int vector) { - u32 vector = fbd->fw_msix_vector; int err; - /* Request the IRQ for FW Mailbox vector. */ - err = request_threaded_irq(vector, NULL, &fbnic_fw_msix_intr, - IRQF_ONESHOT, dev_name(fbd->dev), fbd); - if (err) - return err; - /* Initialize mailbox and attempt to poll it into ready state */ fbnic_mbx_init(fbd); err = fbnic_mbx_poll_tx_ready(fbd); if (err) { dev_warn(fbd->dev, "FW mailbox did not enter ready state\n"); - free_irq(vector, fbd); return err; } - /* Enable interrupts */ + /* Enable interrupt and unmask the vector */ + enable_irq(vector); fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); return 0; } /** - * fbnic_fw_disable_mbx - Disable mailbox and place it in standby state - * @fbd: Pointer to device to disable + * fbnic_fw_request_mbx - Configure and initialize Firmware Mailbox + * @fbd: Pointer to device to initialize * - * This function will disable the mailbox interrupt, free any messages still - * in the mailbox and place it into a standby state. The firmware is - * expected to see the update and assume that the host is in the reset state. + * This function will allocate the IRQ and then reinitialize the mailbox + * starting communication between the host and firmware. + * + * Return: non-zero on failure. **/ -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) +int fbnic_fw_request_mbx(struct fbnic_dev *fbd) { - /* Disable interrupt and free vector */ - fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; - /* Free the vector */ - free_irq(fbd->fw_msix_vector, fbd); + WARN_ON(fbd->fw_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); + if (vector < 0) + return vector; + + /* Request the IRQ for FW Mailbox vector. */ + err = request_threaded_irq(vector, NULL, &fbnic_fw_msix_intr, + IRQF_ONESHOT | IRQF_NO_AUTOEN, + dev_name(fbd->dev), fbd); + if (err) + return err; + + /* Initialize mailbox and attempt to poll it into ready state */ + err = __fbnic_fw_enable_mbx(fbd, vector); + if (err) + free_irq(vector, fbd); + + fbd->fw_msix_vector = vector; + + return err; +} + +/** + * fbnic_fw_disable_mbx - Temporarily place mailbox in standby state + * @fbd: Pointer to device + * + * Shutdown the mailbox by notifying the firmware to stop sending us logs, mask + * and synchronize the IRQ, and then clean up the rings. + **/ +static void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) +{ + /* Disable interrupt and synchronize the IRQ */ + disable_irq(fbd->fw_msix_vector); + + /* Mask the vector */ + fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); /* Make sure disabling logs message is sent, must be done here to * avoid risk of completing without a running interrupt. */ fbnic_mbx_flush_tx(fbd); - - /* Reset the mailboxes to the initialized state */ fbnic_mbx_clean(fbd); } +/** + * fbnic_fw_free_mbx - Disable mailbox and place it in standby state + * @fbd: Pointer to device to disable + * + * This function will disable the mailbox interrupt, free any messages still + * in the mailbox and place it into a disabled state. The firmware is + * expected to see the update and assume that the host is in the reset state. + **/ +void fbnic_fw_free_mbx(struct fbnic_dev *fbd) +{ + /* Vector has already been freed */ + if (!fbd->fw_msix_vector) + return; + + fbnic_fw_disable_mbx(fbd); + + /* Free the vector */ + free_irq(fbd->fw_msix_vector, fbd); + fbd->fw_msix_vector = 0; +} + static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) { struct fbnic_dev *fbd = data; @@ -101,7 +137,7 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) } /** - * fbnic_pcs_irq_enable - Configure the MAC to enable it to advertise link + * fbnic_pcs_request_irq - Configure the PCS to enable it to advertise link * @fbd: Pointer to device to initialize * * This function provides basic bringup for the MAC/PCS IRQ. For now the IRQ @@ -109,41 +145,61 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) * * Return: non-zero on failure. **/ -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd) +int fbnic_pcs_request_irq(struct fbnic_dev *fbd) { - u32 vector = fbd->pcs_msix_vector; - int err; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; - /* Request the IRQ for MAC link vector. - * Map MAC cause to it, and unmask it + WARN_ON(fbd->pcs_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); + if (vector < 0) + return vector; + + /* Request the IRQ for PCS link vector. + * Map PCS cause to it, and unmask it */ err = request_irq(vector, &fbnic_pcs_msix_intr, 0, fbd->netdev->name, fbd); if (err) return err; + /* Map and enable interrupt, unmask vector after link is configured */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY | FBNIC_INTR_MSIX_CTRL_ENABLE); + fbd->pcs_msix_vector = vector; + return 0; } /** - * fbnic_pcs_irq_disable - Teardown the MAC IRQ to prepare for stopping + * fbnic_pcs_free_irq - Teardown the PCS IRQ to prepare for stopping * @fbd: Pointer to device that is stopping * - * This function undoes the work done in fbnic_pcs_irq_enable and prepares + * This function undoes the work done in fbnic_pcs_request_irq and prepares * the device to no longer receive traffic on the host interface. **/ -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd) +void fbnic_pcs_free_irq(struct fbnic_dev *fbd) { + /* Vector has already been freed */ + if (!fbd->pcs_msix_vector) + return; + /* Disable interrupt */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY); + fbnic_wrfl(fbd); + + /* Synchronize IRQ to prevent race that would unmask vector */ + synchronize_irq(fbd->pcs_msix_vector); + + /* Mask the vector */ fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_PCS_MSIX_ENTRY); /* Free the vector */ free_irq(fbd->pcs_msix_vector, fbd); + fbd->pcs_msix_vector = 0; } void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr) @@ -226,9 +282,6 @@ void fbnic_free_irqs(struct fbnic_dev *fbd) { struct pci_dev *pdev = to_pci_dev(fbd->dev); - fbd->pcs_msix_vector = 0; - fbd->fw_msix_vector = 0; - fbd->num_irqs = 0; pci_free_irq_vectors(pdev); @@ -254,8 +307,5 @@ int fbnic_alloc_irqs(struct fbnic_dev *fbd) fbd->num_irqs = num_irqs; - fbd->pcs_msix_vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); - fbd->fw_msix_vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); - return 0; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 79a01fdd1dd1..2524d9b88d59 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -44,9 +44,10 @@ int __fbnic_open(struct fbnic_net *fbn) if (err) goto time_stop; - err = fbnic_pcs_irq_enable(fbd); + err = fbnic_pcs_request_irq(fbd); if (err) goto time_stop; + /* Pull the BMC config and initialize the RPC */ fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); @@ -82,7 +83,7 @@ static int fbnic_stop(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); fbnic_down(fbn); - fbnic_pcs_irq_disable(fbn->fbd); + fbnic_pcs_free_irq(fbn->fbd); fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 6cbbc2ee3e1f..4e8595239c0f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -283,7 +283,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto free_irqs; } - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) { dev_err(&pdev->dev, "Firmware mailbox initialization failure\n"); @@ -363,7 +363,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_hwmon_unregister(fbd); fbnic_dbg_fbd_exit(fbd); fbnic_devlink_unregister(fbd); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); fbnic_devlink_free(fbd); @@ -387,7 +387,7 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); /* Free the IRQs so they aren't trying to occupy sleeping CPUs */ fbnic_free_irqs(fbd); @@ -420,7 +420,7 @@ static int __fbnic_pm_resume(struct device *dev) fbd->mac->init_regs(fbd); /* Re-enable mailbox */ - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) goto err_free_irqs; @@ -438,15 +438,15 @@ static int __fbnic_pm_resume(struct device *dev) if (netif_running(netdev)) { err = __fbnic_open(fbn); if (err) - goto err_disable_mbx; + goto err_free_mbx; } rtnl_unlock(); return 0; -err_disable_mbx: +err_free_mbx: rtnl_unlock(); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: From 0f9a959a0addd9bbc47e5d16c36b3a7f97981915 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:59 -0700 Subject: [PATCH 233/239] fbnic: Actually flush_tx instead of stalling out The fbnic_mbx_flush_tx function had a number of issues. First, we were waiting 200ms for the firmware to process the packets. We can drop this to 20ms and in almost all cases this should be more than enough time. So by changing this we can significantly reduce shutdown time. Second, we were not making sure that the Tx path was actually shut off. As such we could still have packets added while we were flushing the mailbox. To prevent that we can now clear the ready flag for the Tx side and it should stay down since the interrupt is disabled. Third, we kept re-reading the tail due to the second issue. The tail should not move after we have started the flush so we can just read it once while we are holding the mailbox Tx lock. By doing that we are guaranteed that the value should be consistent. Fourth, we were keeping a count of descriptors cleaned due to the second and third issues called out. That count is not a valid reason to be exiting the cleanup, and with the tail only being read once we shouldn't see any cases where the tail moves after the disable so the tracking of count can be dropped. Fifth, we were using attempts * sleep time to determine how long we would wait in our polling loop to flush out the Tx. This can be very imprecise. In order to tighten up the timing we are shifting over to using a jiffies value of jiffies + 10 * HZ + 1 to determine the jiffies value we should stop polling at as this should be accurate within once sleep cycle for the total amount of time spent polling. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654719929.499179.16406653096197423749.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index f4749bfd840c..d019191d6ae9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -930,35 +930,36 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - u8 count = 0; - - /* Nothing to do if there is no mailbox */ - if (!fbnic_fw_present(fbd)) - return; + u8 tail; /* Record current Rx stats */ tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - /* Nothing to do if mailbox never got to ready */ - if (!tx_mbx->ready) - return; + spin_lock_irq(&fbd->fw_tx_lock); + + /* Clear ready to prevent any further attempts to transmit */ + tx_mbx->ready = false; + + /* Read tail to determine the last tail state for the ring */ + tail = tx_mbx->tail; + + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, - * we will wait up to 10 seconds which is 50 waits of 200ms. + * we will wait up to 10 seconds which is 500 waits of 20ms. */ do { u8 head = tx_mbx->head; - if (head == tx_mbx->tail) + /* Tx ring is empty once head == tail */ + if (head == tail) break; - msleep(200); + msleep(20); fbnic_mbx_process_tx_msgs(fbd); - - count += (tx_mbx->head - head) % FBNIC_IPC_MBX_DESC_LEN; - } while (count < FBNIC_IPC_MBX_DESC_LEN && --attempts); + } while (time_is_after_jiffies(timeout)); } void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, From cdbb2dc3996a60ed3d7431c1239a8ca98c778e04 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:05 -0700 Subject: [PATCH 234/239] fbnic: Cleanup handling of completions There was an issue in that if we were to shutdown we could be left with a completion in flight as the mailbox went away. To address that I have added an fbnic_mbx_evict_all_cmpl function that is meant to essentially create a "broken pipe" type response so that all callers will receive an error indicating that the connection has been broken as a result of us shutting down the mailbox. Fixes: 378e5cc1c6c6 ("eth: fbnic: hwmon: Add completion infrastructure for firmware requests") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654720578.499179.380252598204530873.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index d019191d6ae9..732875aae46c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -928,6 +928,20 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) return attempts ? 0 : -ETIMEDOUT; } +static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) +{ + cmpl_data->result = -EPIPE; + complete(&cmpl_data->done); +} + +static void fbnic_mbx_evict_all_cmpl(struct fbnic_dev *fbd) +{ + if (fbd->cmpl_data) { + __fbnic_fw_evict_cmpl(fbd->cmpl_data); + fbd->cmpl_data = NULL; + } +} + void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; @@ -945,6 +959,9 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) /* Read tail to determine the last tail state for the ring */ tail = tx_mbx->tail; + /* Flush any completions as we are no longer processing Rx */ + fbnic_mbx_evict_all_cmpl(fbd); + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, From ab064f6005973d456f95ae99cd9ea0d8ab676cce Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:12 -0700 Subject: [PATCH 235/239] fbnic: Improve responsiveness of fbnic_mbx_poll_tx_ready There were a couple different issues found in fbnic_mbx_poll_tx_ready. Among them were the fact that we were sleeping much longer than we actually needed to as the actual FW could respond in under 20ms. The other issue was that we would just keep polling the mailbox even if the device itself had gone away. To address the responsiveness issues we can decrease the sleeps to 20ms and use a jiffies based timeout value rather than just counting the number of times we slept and then polled. To address the hardware going away we can move the check for the firmware BAR being present from where it was and place it inside the loop after the mailbox descriptor ring is initialized and before we sleep so that we just abort and return an error if the device went away during initialization. With these two changes we see a significant improvement in boot times for the driver. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654721224.499179.2698616208976624755.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 732875aae46c..d344b454f28b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -905,27 +905,30 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - - /* Immediate fail if BAR4 isn't there */ - if (!fbnic_fw_present(fbd)) - return -ENODEV; tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready && --attempts) { + while (!tx_mbx->ready) { + if (!time_is_after_jiffies(timeout)) + return -ETIMEDOUT; + /* Force the firmware to trigger an interrupt response to * avoid the mailbox getting stuck closed if the interrupt * is reset. */ fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); - msleep(200); + /* Immediate fail if BAR4 went away */ + if (!fbnic_fw_present(fbd)) + return -ENODEV; + + msleep(20); fbnic_mbx_poll(fbd); } - return attempts ? 0 : -ETIMEDOUT; + return 0; } static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) From 1b34d1c1dc8384884febd83140c9afbc7c4b9eb8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:18 -0700 Subject: [PATCH 236/239] fbnic: Pull fbnic_fw_xmit_cap_msg use out of interrupt context This change pulls the call to fbnic_fw_xmit_cap_msg out of fbnic_mbx_init_desc_ring and instead places it in the polling function for getting the Tx ready. Doing that we can avoid the potential issue with an interrupt coming in later from the firmware that causes it to get fired in interrupt context. Fixes: 20d2e88cc746 ("eth: fbnic: Add initial messaging to notify FW of our presence") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654721876.499179.9839651602256668493.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 43 ++++++++-------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index d344b454f28b..90a45c701543 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -352,24 +352,6 @@ static int fbnic_fw_xmit_simple_msg(struct fbnic_dev *fbd, u32 msg_type) return err; } -/** - * fbnic_fw_xmit_cap_msg - Allocate and populate a FW capabilities message - * @fbd: FBNIC device structure - * - * Return: NULL on failure to allocate, error pointer on error, or pointer - * to new TLV test message. - * - * Sends a single TLV header indicating the host wants the firmware to - * confirm the capabilities and version. - **/ -static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) -{ - int err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); - - /* Return 0 if we are not calling this on ASIC */ - return (err == -EOPNOTSUPP) ? 0 : err; -} - static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; @@ -393,15 +375,6 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) /* Enable DMA reads from the device */ wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); - - /* Force version to 1 if we successfully requested an update - * from the firmware. This should be overwritten once we get - * the actual version from the firmware in the capabilities - * request message. - */ - if (!fbnic_fw_xmit_cap_msg(fbd) && - !fbd->fw_cap.running.mgmt.version) - fbd->fw_cap.running.mgmt.version = 1; break; } } @@ -907,6 +880,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; + int err; tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; while (!tx_mbx->ready) { @@ -928,7 +902,22 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) fbnic_mbx_poll(fbd); } + /* Request an update from the firmware. This should overwrite + * mgmt.version once we get the actual version from the firmware + * in the capabilities request message. + */ + err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); + if (err) + goto clean_mbx; + + /* Use "1" to indicate we entered the state waiting for a response */ + fbd->fw_cap.running.mgmt.version = 1; + return 0; +clean_mbx: + /* Cleanup Rx buffers and disable mailbox */ + fbnic_mbx_clean(fbd); + return err; } static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) From ce2fa1dba204c761582674cf2eb9cbe0b949b5c7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:25 -0700 Subject: [PATCH 237/239] fbnic: Do not allow mailbox to toggle to ready outside fbnic_mbx_poll_tx_ready We had originally thought to have the mailbox go to ready in the background while we were doing other things. One issue with this though is that we can't disable it by clearing the ready state without also blocking interrupts or calls to mbx_poll as it will just pop back to life during an interrupt. In order to prevent that from happening we can pull the code for toggling to ready out of the interrupt path and instead place it in the fbnic_mbx_poll_tx_ready path so that it becomes the only spot where the Rx/Tx can toggle to the ready state. By doing this we can prevent races where we disable the DMA and/or free buffers only to have an interrupt fire and undo what we have done. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654722518.499179.11612865740376848478.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 27 ++++++++-------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 90a45c701543..3d9636a6c968 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -356,10 +356,6 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; - /* This is a one time init, so just exit if it is completed */ - if (mbx->ready) - return; - mbx->ready = true; switch (mbx_idx) { @@ -379,21 +375,18 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) } } -static void fbnic_mbx_postinit(struct fbnic_dev *fbd) +static bool fbnic_mbx_event(struct fbnic_dev *fbd) { - int i; - /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ if (!(rd32(fbd, FBNIC_INTR_STATUS(0)) & (1u << FBNIC_FW_MSIX_ENTRY))) - return; + return false; wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); - for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + return true; } /** @@ -870,7 +863,7 @@ next_page: void fbnic_mbx_poll(struct fbnic_dev *fbd) { - fbnic_mbx_postinit(fbd); + fbnic_mbx_event(fbd); fbnic_mbx_process_tx_msgs(fbd); fbnic_mbx_process_rx_msgs(fbd); @@ -879,11 +872,9 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; - struct fbnic_fw_mbx *tx_mbx; - int err; + int err, i; - tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready) { + do { if (!time_is_after_jiffies(timeout)) return -ETIMEDOUT; @@ -898,9 +889,11 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) return -ENODEV; msleep(20); + } while (!fbnic_mbx_event(fbd)); - fbnic_mbx_poll(fbd); - } + /* FW has shown signs of life. Enable DMA and start Tx/Rx */ + for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) + fbnic_mbx_init_desc_ring(fbd, i); /* Request an update from the firmware. This should overwrite * mgmt.version once we get the actual version from the firmware From 23fa6a23d97182d36ca3c71e43c804fa91e46a03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:20 -0700 Subject: [PATCH 238/239] net: export a helper for adding up queue stats Older drivers and drivers with lower queue counts often have a static array of queues, rather than allocating structs for each queue on demand. Add a helper for adding up qstats from a queue range. Expectation is that driver will pass a queue range [netdev->real_num_*x_queues, MAX). It was tempting to always use num_*x_queues as the end, but virtio seems to clamp its queue count after allocating the netdev. And this way we can trivaly reuse the helper for [0, real_..). Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/net/netdev_queues.h | 6 ++++ net/core/netdev-genl.c | 69 +++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e5..d01c82983b4d 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -103,6 +103,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 230743bdbb14..dae9f0d432fb 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -708,25 +708,66 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, return 0; } +/** + * netdev_stat_queue_sum() - add up queue stats from range of queues + * @netdev: net_device + * @rx_start: index of the first Rx queue to query + * @rx_end: index after the last Rx queue (first *not* to query) + * @rx_sum: output Rx stats, should be already initialized + * @tx_start: index of the first Tx queue to query + * @tx_end: index after the last Tx queue (first *not* to query) + * @tx_sum: output Tx stats, should be already initialized + * + * Add stats from [start, end) range of queue IDs to *x_sum structs. + * The sum structs must be already initialized. Usually this + * helper is invoked from the .get_base_stats callbacks of drivers + * to account for stats of disabled queues. In that case the ranges + * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). + */ +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum) +{ + const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx; + struct netdev_queue_stats_tx tx; + int i; + + ops = netdev->stat_ops; + + for (i = rx_start; i < rx_end; i++) { + memset(&rx, 0xff, sizeof(rx)); + if (ops->get_queue_stats_rx) + ops->get_queue_stats_rx(netdev, i, &rx); + netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); + } + for (i = tx_start; i < tx_end; i++) { + memset(&tx, 0xff, sizeof(tx)); + if (ops->get_queue_stats_tx) + ops->get_queue_stats_tx(netdev, i, &tx); + netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); + } +} +EXPORT_SYMBOL(netdev_stat_queue_sum); + static int netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { - struct netdev_queue_stats_rx rx_sum, rx; - struct netdev_queue_stats_tx tx_sum, tx; - const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx_sum; + struct netdev_queue_stats_tx tx_sum; void *hdr; - int i; - ops = netdev->stat_ops; /* Netdev can't guarantee any complete counters */ - if (!ops->get_base_stats) + if (!netdev->stat_ops->get_base_stats) return 0; memset(&rx_sum, 0xff, sizeof(rx_sum)); memset(&tx_sum, 0xff, sizeof(tx_sum)); - ops->get_base_stats(netdev, &rx_sum, &tx_sum); + netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); /* The op was there, but nothing reported, don't bother */ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && @@ -739,18 +780,8 @@ netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) goto nla_put_failure; - for (i = 0; i < netdev->real_num_rx_queues; i++) { - memset(&rx, 0xff, sizeof(rx)); - if (ops->get_queue_stats_rx) - ops->get_queue_stats_rx(netdev, i, &rx); - netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); - } - for (i = 0; i < netdev->real_num_tx_queues; i++) { - memset(&tx, 0xff, sizeof(tx)); - if (ops->get_queue_stats_tx) - ops->get_queue_stats_tx(netdev, i, &tx); - netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); - } + netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, + 0, netdev->real_num_tx_queues, &tx_sum); if (netdev_nl_stats_write_rx(rsp, &rx_sum) || netdev_nl_stats_write_tx(rsp, &tx_sum)) From 001160ec8c59115efc39e197d40829bdafd4d7f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:21 -0700 Subject: [PATCH 239/239] virtio-net: fix total qstat values NIPA tests report that the interface statistics reported via qstat are lower than those reported via ip link. Looks like this is because some tests flip the queue count up and down, and we end up with some of the traffic accounted on disabled queues. Add up counters from disabled queues. Fixes: d888f04c09bb ("virtio-net: support queue stat") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f9e3e628ec4d..e53ba600605a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5678,6 +5678,10 @@ static void virtnet_get_base_stats(struct net_device *dev, if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) tx->hw_drop_ratelimits = 0; + + netdev_stat_queue_sum(dev, + dev->real_num_rx_queues, vi->max_queue_pairs, rx, + dev->real_num_tx_queues, vi->max_queue_pairs, tx); } static const struct netdev_stat_ops virtnet_stat_ops = {