This patch series enables ECC edac support for Calxeda ECX-2000 (Midway). The ECX-2000 memory controller is similar to Highbank but has different register bases for error and interrupt registers.
Testing the driver uncovered a bug in the interrupt initialization. So there is also a fix for this for all the Highbank edac drivers. The interrupt was enabled too early which caused a crash if there are interrupts pending (ECC errors) during driver initialization.
Also some improvements and unification of edac log messages I found useful.
-Robert
Rob Herring (1): ARM: dts: calxeda: move memory-controller node out of ecx-common.dtsi
Robert Richter (4): edac, highbank: Fix interrupt setup of mem and l2 controller edac, highbank: Add Calxeda ECX-2000 support edac, highbank: Improve and unify naming edac: Unify reporting of device info for device, mc and pci
.../devicetree/bindings/arm/calxeda/mem-ctrlr.txt | 4 +- arch/arm/boot/dts/ecx-2000.dts | 6 + arch/arm/boot/dts/ecx-common.dtsi | 6 - arch/arm/boot/dts/highbank.dts | 6 + drivers/edac/edac_device.c | 9 +- drivers/edac/edac_mc.c | 6 +- drivers/edac/edac_pci.c | 8 +- drivers/edac/highbank_l2_edac.c | 33 +++--- drivers/edac/highbank_mc_edac.c | 128 +++++++++++++-------- 9 files changed, 127 insertions(+), 79 deletions(-)
From: Robert Richter robert.richter@linaro.org
Register and enable interrupts after the edac registration. Otherwise incomming ecc error interrupts lead to crashes during device setup.
Fixing this in drivers for mc and l2.
Signed-off-by: Robert Richter robert.richter@linaro.org Signed-off-by: Robert Richter rric@kernel.org --- drivers/edac/highbank_l2_edac.c | 18 ++++++++++-------- drivers/edac/highbank_mc_edac.c | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c index c2bd8c6..10d3d29 100644 --- a/drivers/edac/highbank_l2_edac.c +++ b/drivers/edac/highbank_l2_edac.c @@ -90,28 +90,30 @@ static int highbank_l2_err_probe(struct platform_device *pdev) goto err; }
+ dci->mod_name = dev_name(&pdev->dev); + dci->dev_name = dev_name(&pdev->dev); + + if (edac_device_add_device(dci)) + goto err; + drvdata->db_irq = platform_get_irq(pdev, 0); res = devm_request_irq(&pdev->dev, drvdata->db_irq, highbank_l2_err_handler, 0, dev_name(&pdev->dev), dci); if (res < 0) - goto err; + goto err2;
drvdata->sb_irq = platform_get_irq(pdev, 1); res = devm_request_irq(&pdev->dev, drvdata->sb_irq, highbank_l2_err_handler, 0, dev_name(&pdev->dev), dci); if (res < 0) - goto err; - - dci->mod_name = dev_name(&pdev->dev); - dci->dev_name = dev_name(&pdev->dev); - - if (edac_device_add_device(dci)) - goto err; + goto err2;
devres_close_group(&pdev->dev, NULL); return 0; +err2: + edac_device_del_device(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); edac_device_free_ctl_info(dci); diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 4695dd2..7a78307 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -189,14 +189,6 @@ static int highbank_mc_probe(struct platform_device *pdev) goto err; }
- irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, - 0, dev_name(&pdev->dev), mci); - if (res < 0) { - dev_err(&pdev->dev, "Unable to request irq %d\n", irq); - goto err; - } - mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; @@ -217,10 +209,20 @@ static int highbank_mc_probe(struct platform_device *pdev) if (res < 0) goto err;
+ irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, + 0, dev_name(&pdev->dev), mci); + if (res < 0) { + dev_err(&pdev->dev, "Unable to request irq %d\n", irq); + goto err2; + } + highbank_mc_create_debugfs_nodes(mci);
devres_close_group(&pdev->dev, NULL); return 0; +err2: + edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); edac_mc_free(mci);
From: Rob Herring rob.herring@calxeda.com
The DDR controller is slightly different in ECX-2000 and ECX-1000, so we need to have different nodes for each platform.
Signed-off-by: Rob Herring rob.herring@calxeda.com [Device Tree documentation updated.] Signed-off-by: Robert Richter rric@kernel.org --- Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt | 4 +++- arch/arm/boot/dts/ecx-2000.dts | 6 ++++++ arch/arm/boot/dts/ecx-common.dtsi | 6 ------ arch/arm/boot/dts/highbank.dts | 6 ++++++ 4 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt b/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt index f770ac0..0496759 100644 --- a/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt +++ b/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt @@ -1,7 +1,9 @@ Calxeda DDR memory controller
Properties: -- compatible : Should be "calxeda,hb-ddr-ctrl" +- compatible : Should be: + - "calxeda,hb-ddr-ctrl" for ECX-1000 + - "calxeda,ecx-2000-ddr-ctrl" for ECX-2000 - reg : Address and size for DDR controller registers. - interrupts : Interrupt for DDR controller.
diff --git a/arch/arm/boot/dts/ecx-2000.dts b/arch/arm/boot/dts/ecx-2000.dts index 139b40c..2ccbb57f 100644 --- a/arch/arm/boot/dts/ecx-2000.dts +++ b/arch/arm/boot/dts/ecx-2000.dts @@ -85,6 +85,12 @@ <1 10 0xf08>; };
+ memory-controller@fff00000 { + compatible = "calxeda,ecx-2000-ddr-ctrl"; + reg = <0xfff00000 0x1000>; + interrupts = <0 91 4>; + }; + intc: interrupt-controller@fff11000 { compatible = "arm,cortex-a15-gic"; #interrupt-cells = <3>; diff --git a/arch/arm/boot/dts/ecx-common.dtsi b/arch/arm/boot/dts/ecx-common.dtsi index e8559b7..f95988f 100644 --- a/arch/arm/boot/dts/ecx-common.dtsi +++ b/arch/arm/boot/dts/ecx-common.dtsi @@ -45,12 +45,6 @@ status = "disabled"; };
- memory-controller@fff00000 { - compatible = "calxeda,hb-ddr-ctrl"; - reg = <0xfff00000 0x1000>; - interrupts = <0 91 4>; - }; - ipc@fff20000 { compatible = "arm,pl320", "arm,primecell"; reg = <0xfff20000 0x1000>; diff --git a/arch/arm/boot/dts/highbank.dts b/arch/arm/boot/dts/highbank.dts index 6aad34a..ed14aea 100644 --- a/arch/arm/boot/dts/highbank.dts +++ b/arch/arm/boot/dts/highbank.dts @@ -86,6 +86,12 @@ soc { ranges = <0x00000000 0x00000000 0xffffffff>;
+ memory-controller@fff00000 { + compatible = "calxeda,hb-ddr-ctrl"; + reg = <0xfff00000 0x1000>; + interrupts = <0 91 4>; + }; + timer@fff10600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xfff10600 0x20>;
From: Robert Richter robert.richter@linaro.org
Implement edac support for Calxeda ECX-2000.
The ECX-2000 memory controller is similar to Highbank but has different register bases for error and interrupt registers. There is an own device tree name "calxeda,ecx-2000-ddr-ctrl" for identification and initialization of the ECX-2000 and its base addresses.
Signed-off-by: Robert Richter robert.richter@linaro.org Signed-off-by: Robert Richter rric@kernel.org --- drivers/edac/highbank_mc_edac.c | 105 ++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 35 deletions(-)
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 7a78307..79cb872 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -26,31 +26,40 @@ #include "edac_module.h"
/* DDR Ctrlr Error Registers */ -#define HB_DDR_ECC_OPT 0x128 -#define HB_DDR_ECC_U_ERR_ADDR 0x130 -#define HB_DDR_ECC_U_ERR_STAT 0x134 -#define HB_DDR_ECC_U_ERR_DATAL 0x138 -#define HB_DDR_ECC_U_ERR_DATAH 0x13c -#define HB_DDR_ECC_C_ERR_ADDR 0x140 -#define HB_DDR_ECC_C_ERR_STAT 0x144 -#define HB_DDR_ECC_C_ERR_DATAL 0x148 -#define HB_DDR_ECC_C_ERR_DATAH 0x14c -#define HB_DDR_ECC_INT_STATUS 0x180 -#define HB_DDR_ECC_INT_ACK 0x184 -#define HB_DDR_ECC_U_ERR_ID 0x424 -#define HB_DDR_ECC_C_ERR_ID 0x428
-#define HB_DDR_ECC_INT_STAT_CE 0x8 -#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 -#define HB_DDR_ECC_INT_STAT_UE 0x20 -#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 +#define HB_DDR_ECC_ERR_BASE 0x128 +#define MW_DDR_ECC_ERR_BASE 0x1b4 + +#define HB_DDR_ECC_OPT 0x00 +#define HB_DDR_ECC_U_ERR_ADDR 0x08 +#define HB_DDR_ECC_U_ERR_STAT 0x0c +#define HB_DDR_ECC_U_ERR_DATAL 0x10 +#define HB_DDR_ECC_U_ERR_DATAH 0x14 +#define HB_DDR_ECC_C_ERR_ADDR 0x18 +#define HB_DDR_ECC_C_ERR_STAT 0x1c +#define HB_DDR_ECC_C_ERR_DATAL 0x20 +#define HB_DDR_ECC_C_ERR_DATAH 0x24
#define HB_DDR_ECC_OPT_MODE_MASK 0x3 #define HB_DDR_ECC_OPT_FWC 0x100 #define HB_DDR_ECC_OPT_XOR_SHIFT 16
+/* DDR Ctrlr Interrupt Registers */ + +#define HB_DDR_ECC_INT_BASE 0x180 +#define MW_DDR_ECC_INT_BASE 0x218 + +#define HB_DDR_ECC_INT_STATUS 0x00 +#define HB_DDR_ECC_INT_ACK 0x04 + +#define HB_DDR_ECC_INT_STAT_CE 0x8 +#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 +#define HB_DDR_ECC_INT_STAT_UE 0x20 +#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 + struct hb_mc_drvdata { - void __iomem *mc_vbase; + void __iomem *mc_err_base; + void __iomem *mc_int_base; };
static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) @@ -60,10 +69,10 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) u32 status, err_addr;
/* Read the interrupt status register */ - status = readl(drvdata->mc_vbase + HB_DDR_ECC_INT_STATUS); + status = readl(drvdata->mc_int_base + HB_DDR_ECC_INT_STATUS);
if (status & HB_DDR_ECC_INT_STAT_UE) { - err_addr = readl(drvdata->mc_vbase + HB_DDR_ECC_U_ERR_ADDR); + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_U_ERR_ADDR); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, 0, @@ -71,9 +80,9 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) mci->ctl_name, ""); } if (status & HB_DDR_ECC_INT_STAT_CE) { - u32 syndrome = readl(drvdata->mc_vbase + HB_DDR_ECC_C_ERR_STAT); + u32 syndrome = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_STAT); syndrome = (syndrome >> 8) & 0xff; - err_addr = readl(drvdata->mc_vbase + HB_DDR_ECC_C_ERR_ADDR); + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_ADDR); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, syndrome, @@ -82,7 +91,7 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) }
/* clear the error, clears the interrupt */ - writel(status, drvdata->mc_vbase + HB_DDR_ECC_INT_ACK); + writel(status, drvdata->mc_int_base + HB_DDR_ECC_INT_ACK); return IRQ_HANDLED; }
@@ -104,10 +113,10 @@ static ssize_t highbank_mc_err_inject_write(struct file *file, buf[buf_size] = 0;
if (!kstrtou8(buf, 16, &synd)) { - reg = readl(pdata->mc_vbase + HB_DDR_ECC_OPT); + reg = readl(pdata->mc_err_base + HB_DDR_ECC_OPT); reg &= HB_DDR_ECC_OPT_MODE_MASK; reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC; - writel(reg, pdata->mc_vbase + HB_DDR_ECC_OPT); + writel(reg, pdata->mc_err_base + HB_DDR_ECC_OPT); }
return count; @@ -131,17 +140,46 @@ static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) {} #endif
+struct hb_mc_settings { + int err_offset; + int int_offset; +}; + +static struct hb_mc_settings hb_settings = { + .err_offset = HB_DDR_ECC_ERR_BASE, + .int_offset = HB_DDR_ECC_INT_BASE, +}; + +static struct hb_mc_settings mw_settings = { + .err_offset = MW_DDR_ECC_ERR_BASE, + .int_offset = MW_DDR_ECC_INT_BASE, +}; + +static struct of_device_id hb_ddr_ctrl_of_match[] = { + { .compatible = "calxeda,hb-ddr-ctrl", .data = &hb_settings }, + { .compatible = "calxeda,ecx-2000-ddr-ctrl", .data = &mw_settings }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); + static int highbank_mc_probe(struct platform_device *pdev) { + const struct of_device_id *id; + const struct hb_mc_settings *settings; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct hb_mc_drvdata *drvdata; struct dimm_info *dimm; struct resource *r; + void __iomem *base; u32 control; int irq; int res = 0;
+ id = of_match_device(hb_ddr_ctrl_of_match, &pdev->dev); + if (!id) + return -ENODEV; + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; layers[0].size = 1; layers[0].is_virt_csrow = true; @@ -174,15 +212,18 @@ static int highbank_mc_probe(struct platform_device *pdev) goto err; }
- drvdata->mc_vbase = devm_ioremap(&pdev->dev, - r->start, resource_size(r)); - if (!drvdata->mc_vbase) { + base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!base) { dev_err(&pdev->dev, "Unable to map regs\n"); res = -ENOMEM; goto err; }
- control = readl(drvdata->mc_vbase + HB_DDR_ECC_OPT) & 0x3; + settings = id->data; + drvdata->mc_err_base = base + settings->err_offset; + drvdata->mc_int_base = base + settings->int_offset; + + control = readl(drvdata->mc_err_base + HB_DDR_ECC_OPT) & 0x3; if (!control || (control == 0x2)) { dev_err(&pdev->dev, "No ECC present, or ECC disabled\n"); res = -ENODEV; @@ -238,12 +279,6 @@ static int highbank_mc_remove(struct platform_device *pdev) return 0; }
-static const struct of_device_id hb_ddr_ctrl_of_match[] = { - { .compatible = "calxeda,hb-ddr-ctrl", }, - {}, -}; -MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); - static struct platform_driver highbank_mc_edac_driver = { .probe = highbank_mc_probe, .remove = highbank_mc_remove,
From: Robert Richter robert.richter@linaro.org
Assinging correct names of the 'hb_mc_edac' and 'hb_l2_edac' edac modules for module, controller and device. Reported values for Highbank in dmesg are now:
EDAC MC0: Giving out device to module hb_mc_edac controller calxeda,hb-ddr-ctrl: DEV fff00000.memory-controller (INTERRUPT)
EDAC DEVICE0: Giving out device to module hb_l2_edac controller calxeda,hb-sregs-l2-ecc: DEV fff3c200.sregs (INTERRUPT)
Signed-off-by: Robert Richter robert.richter@linaro.org Signed-off-by: Robert Richter rric@kernel.org --- drivers/edac/highbank_l2_edac.c | 17 ++++++++++------- drivers/edac/highbank_mc_edac.c | 5 +++-- 2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c index 10d3d29..2f19366 100644 --- a/drivers/edac/highbank_l2_edac.c +++ b/drivers/edac/highbank_l2_edac.c @@ -50,8 +50,15 @@ static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id) return IRQ_HANDLED; }
+static const struct of_device_id hb_l2_err_of_match[] = { + { .compatible = "calxeda,hb-sregs-l2-ecc", }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); + static int highbank_l2_err_probe(struct platform_device *pdev) { + const struct of_device_id *id; struct edac_device_ctl_info *dci; struct hb_l2_drvdata *drvdata; struct resource *r; @@ -90,7 +97,9 @@ static int highbank_l2_err_probe(struct platform_device *pdev) goto err; }
- dci->mod_name = dev_name(&pdev->dev); + id = of_match_device(hb_l2_err_of_match, &pdev->dev); + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = id ? id->compatible : "unknown"; dci->dev_name = dev_name(&pdev->dev);
if (edac_device_add_device(dci)) @@ -129,12 +138,6 @@ static int highbank_l2_err_remove(struct platform_device *pdev) return 0; }
-static const struct of_device_id hb_l2_err_of_match[] = { - { .compatible = "calxeda,hb-sregs-l2-ecc", }, - {}, -}; -MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); - static struct platform_driver highbank_l2_edac_driver = { .probe = highbank_l2_err_probe, .remove = highbank_l2_err_remove, diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 79cb872..8221191 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -233,9 +233,10 @@ static int highbank_mc_probe(struct platform_device *pdev) mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = dev_name(&pdev->dev); + mci->mod_name = pdev->dev.driver->name; mci->mod_ver = "1"; - mci->ctl_name = dev_name(&pdev->dev); + mci->ctl_name = id->compatible; + mci->dev_name = dev_name(&pdev->dev); mci->scrub_mode = SCRUB_SW_SRC;
/* Only a single 4GB DIMM is supported */
From: Robert Richter robert.richter@linaro.org
Log messages slightly differ between edac subsystems. Unifying it.
Signed-off-by: Robert Richter robert.richter@linaro.org Signed-off-by: Robert Richter rric@kernel.org --- drivers/edac/edac_device.c | 9 +++------ drivers/edac/edac_mc.c | 6 ++++-- drivers/edac/edac_pci.c | 8 +++----- 3 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 211021d..1026743 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -530,12 +530,9 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
/* Report action taken */ edac_device_printk(edac_dev, KERN_INFO, - "Giving out device to module '%s' controller " - "'%s': DEV '%s' (%s)\n", - edac_dev->mod_name, - edac_dev->ctl_name, - edac_dev_name(edac_dev), - edac_op_state_to_string(edac_dev->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name, + edac_op_state_to_string(edac_dev->op_state));
mutex_unlock(&device_ctls_mutex); return 0; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 89e1090..e8c9ef0 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -788,8 +788,10 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) }
/* Report action taken */ - edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" - " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_printk(mci, KERN_INFO, + "Giving out device to module %s controller %s: DEV %s (%s)\n", + mci->mod_name, mci->ctl_name, mci->dev_name, + edac_op_state_to_string(mci->op_state));
edac_mc_owner = mci->mod_name;
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index dd370f9..2cf44b4d 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -358,11 +358,9 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) }
edac_pci_printk(pci, KERN_INFO, - "Giving out device to module '%s' controller '%s':" - " DEV '%s' (%s)\n", - pci->mod_name, - pci->ctl_name, - edac_dev_name(pci), edac_op_state_to_string(pci->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + pci->mod_name, pci->ctl_name, pci->dev_name, + edac_op_state_to_string(pci->op_state));
mutex_unlock(&edac_pci_ctls_mutex); return 0;
linaro-kernel@lists.linaro.org