crypto: octeontx2 - Fix address alignment issue on ucode loading

octeontx2 crypto driver allocates memory using kmalloc/kzalloc,
and uses this memory for dma (does dma_map_single()). It assumes
that kmalloc/kzalloc will return 128-byte aligned address. But
kmalloc/kzalloc returns 8-byte aligned address after below changes:
  "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the
   smaller cache_line_size()"

Completion address should be 32-Byte alignment when loading
microcode.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
Cc: <stable@vger.kernel.org> # v6.5+
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Bharat Bhushan
2025-05-22 15:36:25 +05:30
committed by Herbert Xu
parent 2157e50f65
commit b7b88b4939

View File

@@ -1491,12 +1491,13 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
union otx2_cpt_opcode opcode;
union otx2_cpt_res_s *result;
union otx2_cpt_inst_s inst;
dma_addr_t result_baddr;
dma_addr_t rptr_baddr;
struct pci_dev *pdev;
u32 len, compl_rlen;
int timeout = 10000;
void *base, *rptr;
int ret, etype;
void *rptr;
u32 len;
/*
* We don't get capabilities if it was already done
@@ -1519,22 +1520,28 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
if (ret)
goto delete_grps;
compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN);
len = compl_rlen + LOADFVC_RLEN;
/* Allocate extra memory for "rptr" and "result" pointer alignment */
len = LOADFVC_RLEN + ARCH_DMA_MINALIGN +
sizeof(union otx2_cpt_res_s) + OTX2_CPT_RES_ADDR_ALIGN;
result = kzalloc(len, GFP_KERNEL);
if (!result) {
base = kzalloc(len, GFP_KERNEL);
if (!base) {
ret = -ENOMEM;
goto lf_cleanup;
}
rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len,
DMA_BIDIRECTIONAL);
rptr = PTR_ALIGN(base, ARCH_DMA_MINALIGN);
rptr_baddr = dma_map_single(&pdev->dev, rptr, len, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&pdev->dev, rptr_baddr)) {
dev_err(&pdev->dev, "DMA mapping failed\n");
ret = -EFAULT;
goto free_result;
goto free_rptr;
}
rptr = (u8 *)result + compl_rlen;
result = (union otx2_cpt_res_s *)PTR_ALIGN(rptr + LOADFVC_RLEN,
OTX2_CPT_RES_ADDR_ALIGN);
result_baddr = ALIGN(rptr_baddr + LOADFVC_RLEN,
OTX2_CPT_RES_ADDR_ALIGN);
/* Fill in the command */
opcode.s.major = LOADFVC_MAJOR_OP;
@@ -1546,14 +1553,14 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
/* 64-bit swap for microcode data reads, not needed for addresses */
cpu_to_be64s(&iq_cmd.cmd.u);
iq_cmd.dptr = 0;
iq_cmd.rptr = rptr_baddr + compl_rlen;
iq_cmd.rptr = rptr_baddr;
iq_cmd.cptr.u = 0;
for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) {
result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT;
iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps,
etype);
otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr);
otx2_cpt_fill_inst(&inst, &iq_cmd, result_baddr);
lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
timeout = 10000;
@@ -1576,8 +1583,8 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
error_no_response:
dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL);
free_result:
kfree(result);
free_rptr:
kfree(base);
lf_cleanup:
otx2_cptlf_shutdown(lfs);
delete_grps: