From 45c63c2f383b2d7b99e060754cdcca29996447c0 Mon Sep 17 00:00:00 2001 From: Prakash Gupta Date: Fri, 13 Mar 2026 15:53:53 +0530 Subject: [PATCH] FROMLIST: iommu/arm-smmu: Use pm_runtime in fault handlers Commit d4a44f0750bb ("iommu/arm-smmu: Invoke pm_runtime across the driver") enabled pm_runtime for the arm-smmu device. On systems where the SMMU sits in a power domain, all register accesses must be done while the device is runtime active to avoid unclocked register reads and potential NoC errors. So far, this has not been an issue for most SMMU clients because stall-on-fault is enabled by default. While a translation fault is being handled, the SMMU stalls further translations for that context bank, so the fault handler would not race with a powered-down SMMU. Adreno SMMU now disables stall-on-fault in the presence of fault storms to avoid saturating SMMU resources and hanging the GMU. With stall-on-fault disabled, the SMMU can generate faults while its power domain may no longer be enabled, which makes unclocked accesses to fault-status registers in the SMMU fault handlers possible. Guard the context and global fault handlers with pm_runtime_get_if_active() and pm_runtime_put_autosuspend() so that all SMMU fault register accesses are done with the SMMU powered. In case pm_runtime is not active we can safely ignore the fault as for pm runtime resume the smmu device is reset and fault registers are cleared. List: https://lore.kernel.org/all/20260313-smmu-rpm-v2-1-8c2236b402b0@oss.qualcomm.com/ Fixes: b13044092c1e ("drm/msm: Temporarily disable stall-on-fault after a page fault") Co-developed-by: Pratyush Brahma Signed-off-by: Pratyush Brahma Signed-off-by: Prakash Gupta --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 60 +++++++++++++++++---------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4ced4b5bee4df..208e4676ab1b7 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -462,10 +462,20 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int idx = smmu_domain->cfg.cbndx; int ret; + if (!pm_runtime_get_if_active(smmu->dev)) + return IRQ_NONE; + + if (smmu->impl && smmu->impl->context_fault) { + ret = smmu->impl->context_fault(irq, dev); + goto out_power_off; + } + arm_smmu_read_context_fault_info(smmu, idx, &cfi); - if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) - return IRQ_NONE; + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) { + ret = IRQ_NONE; + goto out_power_off; + } ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); @@ -480,7 +490,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE); } - return IRQ_HANDLED; + ret = IRQ_HANDLED; + +out_power_off: + pm_runtime_put_autosuspend(smmu->dev); + + return ret; } static irqreturn_t arm_smmu_global_fault(int irq, void *dev) @@ -489,14 +504,25 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) struct arm_smmu_device *smmu = dev; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); + int ret; + + if (!pm_runtime_get_if_active(smmu->dev)) + return IRQ_NONE; + + if (smmu->impl && smmu->impl->global_fault) { + ret = smmu->impl->global_fault(irq, dev); + goto out_power_off; + } gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR); gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0); gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1); gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2); - if (!gfsr) - return IRQ_NONE; + if (!gfsr) { + ret = IRQ_NONE; + goto out_power_off; + } if (__ratelimit(&rs)) { if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && @@ -513,7 +539,11 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) } arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr); - return IRQ_HANDLED; + ret = IRQ_HANDLED; + +out_power_off: + pm_runtime_put_autosuspend(smmu->dev); + return ret; } static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, @@ -683,7 +713,6 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain, enum io_pgtable_fmt fmt; struct iommu_domain *domain = &smmu_domain->domain; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - irqreturn_t (*context_fault)(int irq, void *dev); mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -850,19 +879,14 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain, */ irq = smmu->irqs[cfg->irptndx]; - if (smmu->impl && smmu->impl->context_fault) - context_fault = smmu->impl->context_fault; - else - context_fault = arm_smmu_context_fault; - if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq) ret = devm_request_threaded_irq(smmu->dev, irq, NULL, - context_fault, + arm_smmu_context_fault, IRQF_ONESHOT | IRQF_SHARED, "arm-smmu-context-fault", smmu_domain); else - ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED, + ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, IRQF_SHARED, "arm-smmu-context-fault", smmu_domain); if (ret < 0) { @@ -2122,7 +2146,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; int num_irqs, i, err; u32 global_irqs, pmu_irqs; - irqreturn_t (*global_fault)(int irq, void *dev); smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) { @@ -2202,18 +2225,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->num_context_irqs = smmu->num_context_banks; } - if (smmu->impl && smmu->impl->global_fault) - global_fault = smmu->impl->global_fault; - else - global_fault = arm_smmu_global_fault; - for (i = 0; i < global_irqs; i++) { int irq = platform_get_irq(pdev, i); if (irq < 0) return irq; - err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED, + err = devm_request_irq(dev, irq, arm_smmu_global_fault, IRQF_SHARED, "arm-smmu global fault", smmu); if (err) return dev_err_probe(dev, err,