Quirk handling relies on an idea of simple static array which contains quirk enties. Each entry consists of identification information (IDs from standard header of MCFG table) along with custom pci_ecam_ops structure and configuration space resource structure. This way it is possible find corresponding quirk entries and override pci_ecam_ops and PCI configuration space regions.
As an example, the last 3 patches present quirk handling mechanism usage for ThunderX.
v5 -> v6 - rebase against v4.8-rc5 - drop patch 1 form previous series - keep pci_acpi_setup_ecam_mapping() in ARM64 arch directory - move quirk code to pci_mcfg.c - restrict quirk to override pci_ecam_ops and CFG resource structure only, no init call any more - split ThunderX quirks into the smaller chunks - add ThunderX pass1.x silicon revision support
v4 -> v5 - rebase against v4.8-rc1 - rework to exact MCFG OEM ID, TABLE ID, rev match - use memcmp instead of strncmp - no substring match - fix typos and dmesg message
Tomasz Nowicki (5): PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform specific ECAM quirks PCI: thunder-pem: Allow to probe PEM-specific register range for ACPI case PCI: thunder: Enable ACPI PCI controller for ThunderX pass2.x silicon version PCI: thunder: Enable ACPI PCI controller for ThunderX pass1.x silicon version
arch/arm64/kernel/pci.c | 17 ++-- drivers/acpi/pci_mcfg.c | 168 +++++++++++++++++++++++++++++++++++- drivers/pci/host/pci-thunder-ecam.c | 2 +- drivers/pci/host/pci-thunder-pem.c | 63 +++++++++++--- include/linux/pci-acpi.h | 4 +- include/linux/pci-ecam.h | 7 ++ 6 files changed, 230 insertions(+), 31 deletions(-)
In preparation for adding MCFG platform specific quirk handling move CFG resource calculation and ECAM ops assignment to pci_mcfg_lookup(). It becomes the gate for further ops and CFG resource manipulation in arch-agnostic code (drivers/acpi/pci_mcfg.c).
No functionality changes in this patch.
Signed-off-by: Tomasz Nowicki tn@semihalf.com --- arch/arm64/kernel/pci.c | 17 +++++------------ drivers/acpi/pci_mcfg.c | 28 +++++++++++++++++++++++++--- include/linux/pci-acpi.h | 4 +++- 3 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index acf3872..fb439c7 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -125,24 +125,17 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) u16 seg = root->segment; struct pci_config_window *cfg; struct resource cfgres; - unsigned int bsz; + struct pci_ecam_ops *ecam_ops; + int ret;
- /* Use address from _CBA if present, otherwise lookup MCFG */ - if (!root->mcfg_addr) - root->mcfg_addr = pci_mcfg_lookup(seg, bus_res); - - if (!root->mcfg_addr) { + ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops); + if (ret) { dev_err(&root->device->dev, "%04x:%pR ECAM region not found\n", seg, bus_res); return NULL; }
- bsz = 1 << pci_generic_ecam_ops.bus_shift; - cfgres.start = root->mcfg_addr + bus_res->start * bsz; - cfgres.end = cfgres.start + resource_size(bus_res) * bsz - 1; - cfgres.flags = IORESOURCE_MEM; - cfg = pci_ecam_create(&root->device->dev, &cfgres, bus_res, - &pci_generic_ecam_ops); + cfg = pci_ecam_create(&root->device->dev, &cfgres, bus_res, ecam_ops); if (IS_ERR(cfg)) { dev_err(&root->device->dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg)); diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index b5b376e..ffcc651 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -22,6 +22,7 @@ #include <linux/kernel.h> #include <linux/pci.h> #include <linux/pci-acpi.h> +#include <linux/pci-ecam.h>
/* Structure to hold entries from the MCFG table */ struct mcfg_entry { @@ -35,9 +36,18 @@ struct mcfg_entry { /* List to save MCFG entries */ static LIST_HEAD(pci_mcfg_list);
-phys_addr_t pci_mcfg_lookup(u16 seg, struct resource *bus_res) +int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres, + struct pci_ecam_ops **ecam_ops) { + struct pci_ecam_ops *ops = &pci_generic_ecam_ops; + struct resource *bus_res = &root->secondary; + u16 seg = root->segment; struct mcfg_entry *e; + struct resource res; + + /* Use address from _CBA if present, otherwise lookup MCFG */ + if (root->mcfg_addr) + goto skip_lookup;
/* * We expect exact match, unless MCFG entry end bus covers more than @@ -45,10 +55,22 @@ phys_addr_t pci_mcfg_lookup(u16 seg, struct resource *bus_res) */ list_for_each_entry(e, &pci_mcfg_list, list) { if (e->segment == seg && e->bus_start == bus_res->start && - e->bus_end >= bus_res->end) - return e->addr; + e->bus_end >= bus_res->end) { + root->mcfg_addr = e->addr; + } + }
+ if (!root->mcfg_addr) + return -ENXIO; + +skip_lookup: + memset(&res, 0, sizeof(res)); + res.start = root->mcfg_addr + (bus_res->start << 20); + res.end = res.start + (resource_size(bus_res) << 20) - 1; + res.flags = IORESOURCE_MEM; + *cfgres = res; + *ecam_ops = ops; return 0; }
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 7d63a66..7a4e83a 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -24,7 +24,9 @@ static inline acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev) } extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle);
-extern phys_addr_t pci_mcfg_lookup(u16 domain, struct resource *bus_res); +struct pci_ecam_ops; +extern int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres, + struct pci_ecam_ops **ecam_ops);
static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) {
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities: 1. Override default pci_generic_ecam_ops ops but CFG resource comes from MCFG { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org --- drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup { + char oem_id[ACPI_OEM_ID_SIZE + 1]; + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; + u32 oem_revision; + u16 seg; + struct resource bus_range; + struct pci_ecam_ops *ops; + struct resource cfgres; +}; + +#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \ + ((end) - (start) + 1), \ + NULL, IORESOURCE_BUS) +#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0) + +static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +}; + +static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision; + +static void pci_mcfg_match_quirks(struct acpi_pci_root *root, + struct resource *cfgres, + struct pci_ecam_ops **ecam_ops) +{ + struct mcfg_fixup *f; + int i; + + /* + * First match against PCI topology domain:bus then use OEM ID, OEM + * table ID, and OEM revision from MCFG table standard header. + */ + for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) { + if (f->seg == root->segment && + resource_contains(&f->bus_range, &root->secondary) && + !memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) && + !memcmp(f->oem_table_id, mcfg_oem_table_id, + ACPI_OEM_TABLE_ID_SIZE) && + f->oem_revision == mcfg_oem_revision) { + if (f->cfgres.start) + *cfgres = f->cfgres; + if (f->ops) + *ecam_ops = f->ops; + dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n", + f->oem_id, f->oem_table_id, f->oem_revision); + return; + } + } +}
/* List to save MCFG entries */ static LIST_HEAD(pci_mcfg_list); @@ -61,14 +114,24 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
}
- if (!root->mcfg_addr) - return -ENXIO; - skip_lookup: memset(&res, 0, sizeof(res)); - res.start = root->mcfg_addr + (bus_res->start << 20); - res.end = res.start + (resource_size(bus_res) << 20) - 1; - res.flags = IORESOURCE_MEM; + if (root->mcfg_addr) { + res.start = root->mcfg_addr + (bus_res->start << 20); + res.end = res.start + (resource_size(bus_res) << 20) - 1; + res.flags = IORESOURCE_MEM; + } + + /* + * Let to override default ECAM ops and CFG resource range. + * Also, this might even retrieve CFG resource range in case MCFG + * does not have it. Invalid CFG start address means MCFG firmware bug + * or we need another quirk in array. + */ + pci_mcfg_match_quirks(root, &res, &ops); + if (!res.start) + return -ENXIO; + *cfgres = res; *ecam_ops = ops; return 0; @@ -101,6 +164,11 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header) list_add(&e->list, &pci_mcfg_list); }
+ /* Save MCFG IDs and revision for quirks matching */ + memcpy(mcfg_oem_id, header->oem_id, ACPI_OEM_ID_SIZE); + memcpy(mcfg_oem_table_id, header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE); + mcfg_oem_revision = header->revision; + pr_info("MCFG table detected, %d entries\n", n); return 0; }
On Fri, Sep 9, 2016 at 12:24 PM, Tomasz Nowicki tn@semihalf.com wrote:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes from MCFG
{ "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
char oem_id[ACPI_OEM_ID_SIZE + 1];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
u32 oem_revision;
u16 seg;
struct resource bus_range;
struct pci_ecam_ops *ops;
struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
struct mcfg_fixup *f;
int i;
/*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
*/
for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {
if (f->seg == root->segment &&
Is dropping the comparison with MCFG_DOM_ANY intended? It is useful if all the controllers (segs) can use the same quirk (X-Gene case). If you decide to drop it, then we can remove MCFG_DOM_ANY definition as well.
resource_contains(&f->bus_range, &root->secondary) &&
!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision) {
if (f->cfgres.start)
*cfgres = f->cfgres;
if (f->ops)
*ecam_ops = f->ops;
dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n",
f->oem_id, f->oem_table_id, f->oem_revision);
return;
}
}
+}
/* List to save MCFG entries */ static LIST_HEAD(pci_mcfg_list); @@ -61,14 +114,24 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
}
if (!root->mcfg_addr)
return -ENXIO;
skip_lookup: memset(&res, 0, sizeof(res));
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
if (root->mcfg_addr) {
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
}
/*
* Let to override default ECAM ops and CFG resource range.
* Also, this might even retrieve CFG resource range in case MCFG
* does not have it. Invalid CFG start address means MCFG firmware bug
* or we need another quirk in array.
*/
pci_mcfg_match_quirks(root, &res, &ops);
if (!res.start)
return -ENXIO;
*cfgres = res; *ecam_ops = ops; return 0;
@@ -101,6 +164,11 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header) list_add(&e->list, &pci_mcfg_list); }
/* Save MCFG IDs and revision for quirks matching */
memcpy(mcfg_oem_id, header->oem_id, ACPI_OEM_ID_SIZE);
memcpy(mcfg_oem_table_id, header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
mcfg_oem_revision = header->revision;
pr_info("MCFG table detected, %d entries\n", n); return 0;
}
1.9.1
Regards, Duc Dang.
On Mon, Sep 12, 2016 at 3:24 PM, Duc Dang dhdang@apm.com wrote:
On Fri, Sep 9, 2016 at 12:24 PM, Tomasz Nowicki tn@semihalf.com wrote:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes from MCFG
{ "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
char oem_id[ACPI_OEM_ID_SIZE + 1];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
u32 oem_revision;
u16 seg;
struct resource bus_range;
struct pci_ecam_ops *ops;
struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
struct mcfg_fixup *f;
int i;
/*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
*/
for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {
if (f->seg == root->segment &&
Is dropping the comparison with MCFG_DOM_ANY intended? It is useful if all the controllers (segs) can use the same quirk (X-Gene case). If you decide to drop it, then we can remove MCFG_DOM_ANY definition as well.
resource_contains(&f->bus_range, &root->secondary) &&
!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision) {
if (f->cfgres.start)
*cfgres = f->cfgres;
if (f->ops)
*ecam_ops = f->ops;
dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n",
f->oem_id, f->oem_table_id, f->oem_revision);
return;
}
}
+}
/* List to save MCFG entries */ static LIST_HEAD(pci_mcfg_list); @@ -61,14 +114,24 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
}
if (!root->mcfg_addr)
return -ENXIO;
skip_lookup: memset(&res, 0, sizeof(res));
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
if (root->mcfg_addr) {
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
}
/*
* Let to override default ECAM ops and CFG resource range.
* Also, this might even retrieve CFG resource range in case MCFG
* does not have it. Invalid CFG start address means MCFG firmware bug
* or we need another quirk in array.
*/
pci_mcfg_match_quirks(root, &res, &ops);
if (!res.start)
return -ENXIO;
*cfgres = res; *ecam_ops = ops; return 0;
@@ -101,6 +164,11 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header) list_add(&e->list, &pci_mcfg_list); }
/* Save MCFG IDs and revision for quirks matching */
memcpy(mcfg_oem_id, header->oem_id, ACPI_OEM_ID_SIZE);
memcpy(mcfg_oem_table_id, header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
mcfg_oem_revision = header->revision;
I think this is a typo, it should be: mcfg_oem_revision = header->oem_revision;
pr_info("MCFG table detected, %d entries\n", n); return 0;
}
1.9.1
Regards, Duc Dang.
Regards, Duc Dang.
On 13.09.2016 00:47, Duc Dang wrote:
On Mon, Sep 12, 2016 at 3:24 PM, Duc Dang dhdang@apm.com wrote:
On Fri, Sep 9, 2016 at 12:24 PM, Tomasz Nowicki tn@semihalf.com wrote:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes from MCFG
{ "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
char oem_id[ACPI_OEM_ID_SIZE + 1];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
u32 oem_revision;
u16 seg;
struct resource bus_range;
struct pci_ecam_ops *ops;
struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
struct mcfg_fixup *f;
int i;
/*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
*/
for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {
if (f->seg == root->segment &&
Is dropping the comparison with MCFG_DOM_ANY intended? It is useful if all the controllers (segs) can use the same quirk (X-Gene case). If you decide to drop it, then we can remove MCFG_DOM_ANY definition as well.
resource_contains(&f->bus_range, &root->secondary) &&
!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision) {
if (f->cfgres.start)
*cfgres = f->cfgres;
if (f->ops)
*ecam_ops = f->ops;
dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n",
f->oem_id, f->oem_table_id, f->oem_revision);
return;
}
}
+}
/* List to save MCFG entries */ static LIST_HEAD(pci_mcfg_list); @@ -61,14 +114,24 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
}
if (!root->mcfg_addr)
return -ENXIO;
skip_lookup: memset(&res, 0, sizeof(res));
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
if (root->mcfg_addr) {
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
}
/*
* Let to override default ECAM ops and CFG resource range.
* Also, this might even retrieve CFG resource range in case MCFG
* does not have it. Invalid CFG start address means MCFG firmware bug
* or we need another quirk in array.
*/
pci_mcfg_match_quirks(root, &res, &ops);
if (!res.start)
return -ENXIO;
*cfgres = res; *ecam_ops = ops; return 0;
@@ -101,6 +164,11 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header) list_add(&e->list, &pci_mcfg_list); }
/* Save MCFG IDs and revision for quirks matching */
memcpy(mcfg_oem_id, header->oem_id, ACPI_OEM_ID_SIZE);
memcpy(mcfg_oem_table_id, header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
mcfg_oem_revision = header->revision;
I think this is a typo, it should be: mcfg_oem_revision = header->oem_revision;
You are right, header->oem_revision should be assigned.
Thanks! Tomasz
On 13.09.2016 00:24, Duc Dang wrote:
On Fri, Sep 9, 2016 at 12:24 PM, Tomasz Nowicki tn@semihalf.com wrote:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes from MCFG
{ "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
char oem_id[ACPI_OEM_ID_SIZE + 1];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
u32 oem_revision;
u16 seg;
struct resource bus_range;
struct pci_ecam_ops *ops;
struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
struct mcfg_fixup *f;
int i;
/*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
*/
for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {
if (f->seg == root->segment &&
Is dropping the comparison with MCFG_DOM_ANY intended? It is useful if all the controllers (segs) can use the same quirk (X-Gene case).
MCFG_DOM_ANY makes sense only if we want to use the same ops for segment range, but not for CFG range manipulation. So we can add MCFG_DOM_ANY back here in the hope it will be used properly.
Thanks, Tomasz
Hi Tomasz
在 2016/9/10 3:24, Tomasz Nowicki 写道:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes from MCFG
{ "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
- char oem_id[ACPI_OEM_ID_SIZE + 1];
- char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
- u32 oem_revision;
- u16 seg;
- struct resource bus_range;
- struct pci_ecam_ops *ops;
- struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
- struct mcfg_fixup *f;
- int i;
- /*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
*/
- for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {
if (f->seg == root->segment &&
why not use MCFG_DOM_RANGE, I think MCFG_DOM_RANGE is better. if drop MCFG_DOM_RANGE, mcfg_quirks[] will be more complex.
static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ #ifdef CONFIG_PCI_HOST_THUNDER_ECAM /* SoC pass1.x */ { "CAVIUM", "THUNDERX", 2, 0, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 1, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 2, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 3, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 10, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 11, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 12, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 13, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, #endif ..... };
As PATCH v5 we only need define mcfg_quirks as below, It looks better. static struct pci_cfg_fixup mcfg_quirks[] __initconst = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, pci_ops, init_hook }, */ #ifdef CONFIG_PCI_HOST_THUNDER_PEM /* Pass2.0 */ { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(4, 9), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(14, 19), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, #endif #ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip05_init}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip06_init}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip07_init}, #endif };
resource_contains(&f->bus_range, &root->secondary) &&
!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision) {
if (f->cfgres.start)
*cfgres = f->cfgres;
if (f->ops)
*ecam_ops = f->ops;
dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n",
f->oem_id, f->oem_table_id, f->oem_revision);
return;
}
- }
+}
/* List to save MCFG entries */ static LIST_HEAD(pci_mcfg_list); @@ -61,14 +114,24 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
}
- if (!root->mcfg_addr)
return -ENXIO;
- skip_lookup: memset(&res, 0, sizeof(res));
- res.start = root->mcfg_addr + (bus_res->start << 20);
- res.end = res.start + (resource_size(bus_res) << 20) - 1;
- res.flags = IORESOURCE_MEM;
- if (root->mcfg_addr) {
res.start = root->mcfg_addr + (bus_res->start << 20);
res.end = res.start + (resource_size(bus_res) << 20) - 1;
res.flags = IORESOURCE_MEM;
- }
- /*
* Let to override default ECAM ops and CFG resource range.
* Also, this might even retrieve CFG resource range in case MCFG
* does not have it. Invalid CFG start address means MCFG firmware bug
* or we need another quirk in array.
*/
- pci_mcfg_match_quirks(root, &res, &ops);
- if (!res.start)
return -ENXIO;
- *cfgres = res; *ecam_ops = ops; return 0;
@@ -101,6 +164,11 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header) list_add(&e->list, &pci_mcfg_list); }
- /* Save MCFG IDs and revision for quirks matching */
- memcpy(mcfg_oem_id, header->oem_id, ACPI_OEM_ID_SIZE);
- memcpy(mcfg_oem_table_id, header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
- mcfg_oem_revision = header->revision;
- pr_info("MCFG table detected, %d entries\n", n); return 0; }
Hi Liu,
On 13.09.2016 04:36, Dongdong Liu wrote:
Hi Tomasz
在 2016/9/10 3:24, Tomasz Nowicki 写道:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes
from MCFG { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
- char oem_id[ACPI_OEM_ID_SIZE + 1];
- char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
- u32 oem_revision;
- u16 seg;
- struct resource bus_range;
- struct pci_ecam_ops *ops;
- struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
- struct mcfg_fixup *f;
- int i;
- /*
* First match against PCI topology <domain:bus> then use OEM ID,
OEM
* table ID, and OEM revision from MCFG table standard header.
*/
- for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++,
f++) {
if (f->seg == root->segment &&
why not use MCFG_DOM_RANGE, I think MCFG_DOM_RANGE is better. if drop MCFG_DOM_RANGE, mcfg_quirks[] will be more complex.
static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ #ifdef CONFIG_PCI_HOST_THUNDER_ECAM /* SoC pass1.x */ { "CAVIUM", "THUNDERX", 2, 0, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 1, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 2, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 3, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 10, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 11, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 12, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 13, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, #endif ..... };
As PATCH v5 we only need define mcfg_quirks as below, It looks better. static struct pci_cfg_fixup mcfg_quirks[] __initconst = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, pci_ops, init_hook }, */ #ifdef CONFIG_PCI_HOST_THUNDER_PEM /* Pass2.0 */ { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(4, 9), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(14, 19), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, #endif #ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip05_init}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip06_init}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip07_init}, #endif };
Note this series disallow hisi_pcie_acpi_hip07_init() call. According to the Bjorn suggestion I rework quirk code to override ops and CFG resources only. Giving that I do not see the way to use MCFG_DOM_RANGE macro. For HISI you would need to get CFG range for each possible case:
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start0, size0)}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start1, size1)}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start2, size2)}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start3, size3)}, [...] #endif
Indeed there are more entries here but you do not have to define the same resource array in driver.
Thanks, Tomasz
Hi Tomasz
在 2016/9/13 14:32, Tomasz Nowicki 写道:
Hi Liu,
On 13.09.2016 04:36, Dongdong Liu wrote:
Hi Tomasz
在 2016/9/10 3:24, Tomasz Nowicki 写道:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes
from MCFG { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
- char oem_id[ACPI_OEM_ID_SIZE + 1];
- char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
- u32 oem_revision;
- u16 seg;
- struct resource bus_range;
- struct pci_ecam_ops *ops;
- struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
- struct mcfg_fixup *f;
- int i;
- /*
* First match against PCI topology <domain:bus> then use OEM ID,
OEM
* table ID, and OEM revision from MCFG table standard header.
*/
- for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++,
f++) {
if (f->seg == root->segment &&
why not use MCFG_DOM_RANGE, I think MCFG_DOM_RANGE is better. if drop MCFG_DOM_RANGE, mcfg_quirks[] will be more complex.
static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ #ifdef CONFIG_PCI_HOST_THUNDER_ECAM /* SoC pass1.x */ { "CAVIUM", "THUNDERX", 2, 0, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 1, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 2, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 3, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 10, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 11, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 12, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 13, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, #endif ..... };
As PATCH v5 we only need define mcfg_quirks as below, It looks better. static struct pci_cfg_fixup mcfg_quirks[] __initconst = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, pci_ops, init_hook }, */ #ifdef CONFIG_PCI_HOST_THUNDER_PEM /* Pass2.0 */ { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(4, 9), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(14, 19), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, #endif #ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip05_init}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip06_init}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip07_init}, #endif };
Note this series disallow hisi_pcie_acpi_hip07_init() call. According to the Bjorn suggestion I rework quirk code to override ops and CFG resources only. Giving that I do not see the way to use MCFG_DOM_RANGE macro. For HISI you would need to get CFG range for each possible case:
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start0, size0)}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start1, size1)}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start2, size2)}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start3, size3)}, [...] #endif
Indeed there are more entries here but you do not have to define the same resource array in driver.
Our host bridge is non ECAM only for the RC bus config space; for any other bus underneath the root bus we support ECAM access.
RC config resource with hardcode as DEFINE_RES_MEM(0xb0070000, SZ_4K), EP config resource we get it from MCFG table. So we need to override ops, but config resource we only need to hardcode with RC config resource.
Our host controller ACPI support patch can be found: https://lkml.org/lkml/2016/8/31/340 This patch is based on RFC V5 quirk mechanism.
Based on V6 quirk mechanism, we have to change it as below:
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, .... { "HISI ", "HIP07 ", 0, 15, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY},
#endif
struct pci_ecam_ops hisi_pci_hip05_ops = { .bus_shift = 20, .init = hisi_pci_hip05_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
struct pci_ecam_ops hisi_pci_hip06_ops = { .bus_shift = 20, .init = hisi_pci_hip06_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
hisi_pci_hipxx_init function is used to get RC config resource with hardcode. .....
So I hope we can use MCFG_DOM_RANGE, Then I can change it as below.
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, #endif
Thanks Dongdong
Thanks, Tomasz
.
On Tue, Sep 13, 2016 at 07:38:39PM +0800, Dongdong Liu wrote:
Hi Tomasz
?? 2016/9/13 14:32, Tomasz Nowicki ????:
Hi Liu,
On 13.09.2016 04:36, Dongdong Liu wrote:
Hi Tomasz
?? 2016/9/10 3:24, Tomasz Nowicki ????:
Some platforms may not be fully compliant with generic set of PCI config accessors. For these cases we implement the way to overwrite CFG accessors set and configuration space range.
In first place pci_mcfg_parse() saves machine's IDs and revision number (these come from MCFG header) in order to match against known quirk entries. Then the algorithm traverses available quirk list (static array), matches against <oem_id, oem_table_id, rev, domain, bus number range> and returns custom PCI config ops and/or CFG resource structure.
When adding new quirk there are two possibilities:
- Override default pci_generic_ecam_ops ops but CFG resource comes
from MCFG { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &foo_ops, MCFG_RES_EMPTY }, 2. Override default pci_generic_ecam_ops ops and CFG resource. For this case it is also allowed get CFG resource from quirk entry w/o having it in MCFG. { "OEM_ID", "OEM_TABLE_ID", <REV>, <DOMAIN>, <BUS_NR>, &boo_ops, DEFINE_RES_MEM(START, SIZE) },
pci_generic_ecam_ops and MCFG entries will be used for platforms free from quirks.
Signed-off-by: Tomasz Nowicki tn@semihalf.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com Signed-off-by: Christopher Covington cov@codeaurora.org
drivers/acpi/pci_mcfg.c | 80 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index ffcc651..2b8acc7 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -32,6 +32,59 @@ struct mcfg_entry { u8 bus_start; u8 bus_end; }; +struct mcfg_fixup {
- char oem_id[ACPI_OEM_ID_SIZE + 1];
- char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
- u32 oem_revision;
- u16 seg;
- struct resource bus_range;
- struct pci_ecam_ops *ops;
- struct resource cfgres;
+};
+#define MCFG_DOM_ANY (-1) +#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
+#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) +#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0)
+static struct mcfg_fixup mcfg_quirks[] = { +/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +};
+static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; +static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; +static u32 mcfg_oem_revision;
+static void pci_mcfg_match_quirks(struct acpi_pci_root *root,
struct resource *cfgres,
struct pci_ecam_ops **ecam_ops)
+{
- struct mcfg_fixup *f;
- int i;
- /*
* First match against PCI topology <domain:bus> then use OEM ID,
OEM
* table ID, and OEM revision from MCFG table standard header.
*/
- for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++,
f++) {
if (f->seg == root->segment &&
why not use MCFG_DOM_RANGE, I think MCFG_DOM_RANGE is better. if drop MCFG_DOM_RANGE, mcfg_quirks[] will be more complex.
static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ #ifdef CONFIG_PCI_HOST_THUNDER_ECAM /* SoC pass1.x */ { "CAVIUM", "THUNDERX", 2, 0, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 1, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 2, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 3, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 10, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 11, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 12, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, { "CAVIUM", "THUNDERX", 2, 13, MCFG_BUS_ANY, &pci_thunder_ecam_ops, MCFG_RES_EMPTY}, #endif ..... };
As PATCH v5 we only need define mcfg_quirks as below, It looks better. static struct pci_cfg_fixup mcfg_quirks[] __initconst = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, pci_ops, init_hook }, */ #ifdef CONFIG_PCI_HOST_THUNDER_PEM /* Pass2.0 */ { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(4, 9), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, { "CAVIUM", "THUNDERX", 1, MCFG_DOM_RANGE(14, 19), MCFG_BUS_ANY, NULL, thunder_pem_cfg_init }, #endif #ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip05_init}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip06_init}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, NULL, hisi_pcie_acpi_hip07_init}, #endif };
Note this series disallow hisi_pcie_acpi_hip07_init() call. According to the Bjorn suggestion I rework quirk code to override ops and CFG resources only. Giving that I do not see the way to use MCFG_DOM_RANGE macro. For HISI you would need to get CFG range for each possible case:
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start0, size0)}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start1, size1)}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start2, size2)}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_ops, DEFINE_RES_MEM(start3, size3)}, [...] #endif
Indeed there are more entries here but you do not have to define the same resource array in driver.
Our host bridge is non ECAM only for the RC bus config space; for any other bus underneath the root bus we support ECAM access.
RC config resource with hardcode as DEFINE_RES_MEM(0xb0070000, SZ_4K), EP config resource we get it from MCFG table. So we need to override ops, but config resource we only need to hardcode with RC config resource.
Our host controller ACPI support patch can be found: https://lkml.org/lkml/2016/8/31/340 This patch is based on RFC V5 quirk mechanism.
Based on V6 quirk mechanism, we have to change it as below:
That's because you are hacking around the quirk mechanism to define resources that have nothing to do with PCI config regions (that you ioremap and use to check the PCI link status) and of top of that you are *still* using the MCFG to describe config regions that are NOT ECAM compliant, which is the exact opposite of what this patchset is meant to achieve.
If a config region is not ECAM compliant having it defined in the MCFG is a firmware bug and the way you are using this patchset is not what we designed it for, please correct me if I am wrong.
Thanks, Lorenzo
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, .... { "HISI ", "HIP07 ", 0, 15, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY},
#endif
struct pci_ecam_ops hisi_pci_hip05_ops = { .bus_shift = 20, .init = hisi_pci_hip05_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
struct pci_ecam_ops hisi_pci_hip06_ops = { .bus_shift = 20, .init = hisi_pci_hip06_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
hisi_pci_hipxx_init function is used to get RC config resource with hardcode. .....
So I hope we can use MCFG_DOM_RANGE, Then I can change it as below.
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, #endif
Thanks Dongdong
Thanks, Tomasz
.
On Tue, Sep 13, 2016 at 07:38:39PM +0800, Dongdong Liu wrote:
[...]
Our host bridge is non ECAM only for the RC bus config space; for any other bus underneath the root bus we support ECAM access.
RC config resource with hardcode as DEFINE_RES_MEM(0xb0070000, SZ_4K), EP config resource we get it from MCFG table. So we need to override ops, but config resource we only need to hardcode with RC config resource.
Our host controller ACPI support patch can be found: https://lkml.org/lkml/2016/8/31/340
Sorry I misread your code. IIUC you create an array of resources that represent non-ECAM config space (and incidentally contain debug registers to check the link status - that you need to check for every given config access (?)), but you still need to have an MCFG entry that covers the bus number subject to quirk to make sure this mechanism works. Correct ?
This also means that, with the MCFG tables you have and current mainline kernel you are able to probe a root bridge (because the MCFG table covers the bus number that is not ECAM), with enumeration going haywire because it is trying to carry out ECAM accesses on non-ECAM space.
Is my reading correct ?
Anyway, that's not stricly related to this discussion, it is time we converge on this patchset, we can add a domain range if that simplifies things.
Thanks, Lorenzo
This patch is based on RFC V5 quirk mechanism.
Based on V6 quirk mechanism, we have to change it as below:
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, .... { "HISI ", "HIP07 ", 0, 15, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY},
#endif
struct pci_ecam_ops hisi_pci_hip05_ops = { .bus_shift = 20, .init = hisi_pci_hip05_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
struct pci_ecam_ops hisi_pci_hip06_ops = { .bus_shift = 20, .init = hisi_pci_hip06_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
hisi_pci_hipxx_init function is used to get RC config resource with hardcode. .....
So I hope we can use MCFG_DOM_RANGE, Then I can change it as below.
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, #endif
Thanks Dongdong
Thanks, Tomasz
.
Hi Lorenzo and Tomasz
Many Thanks for looking at this
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 15 September 2016 11:59 To: liudongdong (C) Cc: Tomasz Nowicki; helgaas@kernel.org; will.deacon@arm.com; catalin.marinas@arm.com; rafael@kernel.org; arnd@arndb.de; hanjun.guo@linaro.org; okaya@codeaurora.org; jchandra@broadcom.com; cov@codeaurora.org; dhdang@apm.com; ard.biesheuvel@linaro.org; robert.richter@caviumnetworks.com; mw@semihalf.com; Liviu.Dudau@arm.com; ddaney@caviumnetworks.com; Wangyijing; msalter@redhat.com; linux-pci@vger.kernel.org; linux-arm- kernel@lists.infradead.org; linaro-acpi@lists.linaro.org; jcm@redhat.com; andrea.gallo@linaro.org; jeremy.linton@arm.com; Gabriele Paoloni; jhugo@codeaurora.org; linux-acpi@vger.kernel.org; linux-kernel@vger.kernel.org Subject: Re: [PATCH V6 2/5] PCI/ACPI: Check platform specific ECAM quirks
On Tue, Sep 13, 2016 at 07:38:39PM +0800, Dongdong Liu wrote:
[...]
Our host bridge is non ECAM only for the RC bus config space; for any other bus underneath the root bus we support ECAM access.
RC config resource with hardcode as DEFINE_RES_MEM(0xb0070000,
SZ_4K),
EP config resource we get it from MCFG table. So we need to override ops, but config resource we only need to
hardcode with RC config resource.
Our host controller ACPI support patch can be found: https://lkml.org/lkml/2016/8/31/340
Sorry I misread your code. IIUC you create an array of resources that represent non-ECAM config space (and incidentally contain debug registers to check the link status - that you need to check for every given config access (?)), but you still need to have an MCFG entry that
The link status check is inherited from the designware framework (see http://lxr.free-electrons.com/source/drivers/pci/host/pcie-designware.c#L678)
However I think that in this case we can just check the link status in our init function and return an error if the link is down
covers the bus number subject to quirk to make sure this mechanism works. Correct ?
Well we need the quirks for the root bus numbers but if read this v6 quirk mechanism correctly even if we do not specify an mcfg entry for bus 0 oci_mcfg_match_quirks() is called anyway and we can set our special configuration space addresses for the root buses (i.e. I think we can have a clean MCFG table with entries only for those bus ranges that are really ECAM)
This also means that, with the MCFG tables you have and current mainline kernel you are able to probe a root bridge (because the MCFG table covers the bus number that is not ECAM), with enumeration going haywire because it is trying to carry out ECAM accesses on non-ECAM space.
Yes correct, we cannot access the host controller configuration space with our current MCFG table and current Linux mainline
Is my reading correct ?
Anyway, that's not stricly related to this discussion, it is time we converge on this patchset, we can add a domain range if that simplifies things.
IMO it would be better to have the domain range to avoid a very large and repetitive static quirk array
Thanks
Gab
Thanks, Lorenzo
This patch is based on RFC V5 quirk mechanism.
Based on V6 quirk mechanism, we have to change it as below:
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP05 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 2, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, 3, MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 0, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, 1, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, ....
{ "HISI ", "HIP07 ", 0, 15, MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY},
#endif
struct pci_ecam_ops hisi_pci_hip05_ops = { .bus_shift = 20, .init = hisi_pci_hip05_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
struct pci_ecam_ops hisi_pci_hip06_ops = { .bus_shift = 20, .init = hisi_pci_hip06_init, .pci_ops = { .map_bus = pci_ecam_map_bus, .read = hisi_pcie_acpi_rd_conf, .write = hisi_pcie_acpi_wr_conf, } };
hisi_pci_hipxx_init function is used to get RC config resource with
hardcode.
.....
So I hope we can use MCFG_DOM_RANGE, Then I can change it as below.
#ifdef CONFIG_PCI_HISI_ACPI { "HISI ", "HIP05 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip05_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP06 ", 0, MCFG_DOM_RANGE(0, 3), MCFG_BUS_ANY, &hisi_pcie_hip06_ops, MCFG_RES_EMPTY}, { "HISI ", "HIP07 ", 0, MCFG_DOM_RANGE(0, 15), MCFG_BUS_ANY, &hisi_pcie_hip07_ops, MCFG_RES_EMPTY}, #endif
Thanks Dongdong
Thanks, Tomasz
.
On 09/16/2016 05:02 AM, Gabriele Paoloni wrote:
Hi Lorenzo and Tomasz
Many Thanks for looking at this
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 15 September 2016 11:59 To: liudongdong (C) Cc: Tomasz Nowicki; helgaas@kernel.org; will.deacon@arm.com; catalin.marinas@arm.com; rafael@kernel.org; arnd@arndb.de; hanjun.guo@linaro.org; okaya@codeaurora.org; jchandra@broadcom.com; cov@codeaurora.org; dhdang@apm.com; ard.biesheuvel@linaro.org; robert.richter@caviumnetworks.com; mw@semihalf.com; Liviu.Dudau@arm.com; ddaney@caviumnetworks.com; Wangyijing; msalter@redhat.com; linux-pci@vger.kernel.org; linux-arm- kernel@lists.infradead.org; linaro-acpi@lists.linaro.org; jcm@redhat.com; andrea.gallo@linaro.org; jeremy.linton@arm.com; Gabriele Paoloni; jhugo@codeaurora.org; linux-acpi@vger.kernel.org; linux-kernel@vger.kernel.org Subject: Re: [PATCH V6 2/5] PCI/ACPI: Check platform specific ECAM quirks
On Tue, Sep 13, 2016 at 07:38:39PM +0800, Dongdong Liu wrote:
[...]
Our host bridge is non ECAM only for the RC bus config space; for any other bus underneath the root bus we support ECAM access.
RC config resource with hardcode as DEFINE_RES_MEM(0xb0070000,
SZ_4K),
EP config resource we get it from MCFG table. So we need to override ops, but config resource we only need to
hardcode with RC config resource.
Our host controller ACPI support patch can be found: https://lkml.org/lkml/2016/8/31/340
Sorry I misread your code. IIUC you create an array of resources that represent non-ECAM config space (and incidentally contain debug registers to check the link status - that you need to check for every given config access (?)), but you still need to have an MCFG entry that
The link status check is inherited from the designware framework (see http://lxr.free-electrons.com/source/drivers/pci/host/pcie-designware.c#L678)
However I think that in this case we can just check the link status in our init function and return an error if the link is down
covers the bus number subject to quirk to make sure this mechanism works. Correct ?
Well we need the quirks for the root bus numbers but if read this v6 quirk mechanism correctly even if we do not specify an mcfg entry for bus 0 oci_mcfg_match_quirks() is called anyway and we can set our special configuration space addresses for the root buses (i.e. I think we can have a clean MCFG table with entries only for those bus ranges that are really ECAM)
This also means that, with the MCFG tables you have and current mainline kernel you are able to probe a root bridge (because the MCFG table covers the bus number that is not ECAM), with enumeration going haywire because it is trying to carry out ECAM accesses on non-ECAM space.
Yes correct, we cannot access the host controller configuration space with our current MCFG table and current Linux mainline
Is my reading correct ?
Anyway, that's not stricly related to this discussion, it is time we converge on this patchset, we can add a domain range if that simplifies things.
IMO it would be better to have the domain range to avoid a very large and repetitive static quirk array
The v6 framework requires what, 21 additional lines of quirk data? What if you were to define some preprocessor macros to slim it down? I think something like the following would bring it down to roughly 7 additional lines.
#define PCI_ACPI_QUIRK_QUAD_DOM(rev, dom, ops) \ { "HISI ", rev, 0, dom+0, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY}, \ { "HISI ", rev, 0, dom+1, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY}, \ { "HISI ", rev, 0, dom+2, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY}, \ { "HISI ", rev, 0, dom+3, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY},
PCI_ACPI_QUIRK_QUAD_DOM("HIP05 ", 0, &hisi_pcie_hip05_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP06 ", 0, &hisi_pcie_hip05_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 0, &hisi_pcie_hip06_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 4, &hisi_pcie_hip07_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 8, &hisi_pcie_hip07_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 12, &hisi_pcie_hip07_ops)
Cov
Hi Cov
-----Original Message----- From: Christopher Covington [mailto:cov@codeaurora.org] Sent: 16 September 2016 13:28 To: Gabriele Paoloni; Lorenzo Pieralisi; liudongdong (C) Cc: Tomasz Nowicki; helgaas@kernel.org; will.deacon@arm.com; catalin.marinas@arm.com; rafael@kernel.org; arnd@arndb.de; hanjun.guo@linaro.org; okaya@codeaurora.org; jchandra@broadcom.com; dhdang@apm.com; ard.biesheuvel@linaro.org; robert.richter@caviumnetworks.com; mw@semihalf.com; Liviu.Dudau@arm.com; ddaney@caviumnetworks.com; Wangyijing; msalter@redhat.com; linux-pci@vger.kernel.org; linux-arm- kernel@lists.infradead.org; linaro-acpi@lists.linaro.org; jcm@redhat.com; andrea.gallo@linaro.org; jeremy.linton@arm.com; jhugo@codeaurora.org; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org Subject: Re: [PATCH V6 2/5] PCI/ACPI: Check platform specific ECAM quirks
On 09/16/2016 05:02 AM, Gabriele Paoloni wrote:
Hi Lorenzo and Tomasz
Many Thanks for looking at this
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 15 September 2016 11:59 To: liudongdong (C) Cc: Tomasz Nowicki; helgaas@kernel.org; will.deacon@arm.com; catalin.marinas@arm.com; rafael@kernel.org; arnd@arndb.de; hanjun.guo@linaro.org; okaya@codeaurora.org; jchandra@broadcom.com; cov@codeaurora.org; dhdang@apm.com; ard.biesheuvel@linaro.org; robert.richter@caviumnetworks.com; mw@semihalf.com; Liviu.Dudau@arm.com; ddaney@caviumnetworks.com; Wangyijing; msalter@redhat.com; linux-pci@vger.kernel.org; linux-arm- kernel@lists.infradead.org; linaro-acpi@lists.linaro.org; jcm@redhat.com; andrea.gallo@linaro.org; jeremy.linton@arm.com; Gabriele Paoloni; jhugo@codeaurora.org; linux-acpi@vger.kernel.org; linux-kernel@vger.kernel.org Subject: Re: [PATCH V6 2/5] PCI/ACPI: Check platform specific ECAM quirks
On Tue, Sep 13, 2016 at 07:38:39PM +0800, Dongdong Liu wrote:
[...]
Our host bridge is non ECAM only for the RC bus config space; for any other bus underneath the root bus we support ECAM access.
RC config resource with hardcode as DEFINE_RES_MEM(0xb0070000,
SZ_4K),
EP config resource we get it from MCFG table. So we need to override ops, but config resource we only need to
hardcode with RC config resource.
Our host controller ACPI support patch can be found: https://lkml.org/lkml/2016/8/31/340
Sorry I misread your code. IIUC you create an array of resources
that
represent non-ECAM config space (and incidentally contain debug registers to check the link status - that you need to check for
every
given config access (?)), but you still need to have an MCFG entry
that
The link status check is inherited from the designware framework (see http://lxr.free-electrons.com/source/drivers/pci/host/pcie-
designware.c#L678)
However I think that in this case we can just check the link status in our init function and return an error if the link is down
covers the bus number subject to quirk to make sure this mechanism works. Correct ?
Well we need the quirks for the root bus numbers but if read this v6 quirk mechanism correctly even if we do not specify an mcfg entry for bus 0 oci_mcfg_match_quirks() is called anyway and we can set our special configuration space addresses for the root buses (i.e. I
think
we can have a clean MCFG table with entries only for those bus ranges that are really ECAM)
This also means that, with the MCFG tables you have and current mainline kernel you are able to probe a root bridge (because the
MCFG
table covers the bus number that is not ECAM), with enumeration going haywire because it is trying to carry out ECAM accesses on non-ECAM space.
Yes correct, we cannot access the host controller configuration space with our current MCFG table and current Linux mainline
Is my reading correct ?
Anyway, that's not stricly related to this discussion, it is time we converge on this patchset, we can add a domain range if that simplifies things.
IMO it would be better to have the domain range to avoid a very large and repetitive static quirk array
The v6 framework requires what, 21 additional lines of quirk data? What if you were to define some preprocessor macros to slim it down? I think something like the following would bring it down to roughly 7 additional lines.
Thanks, yes this would work as well. To be honest this is not a big issue. So either we go this way or we introduce domain range...
Cheers
Gab
#define PCI_ACPI_QUIRK_QUAD_DOM(rev, dom, ops) \ { "HISI ", rev, 0, dom+0, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY}, \ { "HISI ", rev, 0, dom+1, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY}, \ { "HISI ", rev, 0, dom+2, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY}, \ { "HISI ", rev, 0, dom+3, MCFG_BUS_ANY, ops, MCFG_RES_EMPTY},
PCI_ACPI_QUIRK_QUAD_DOM("HIP05 ", 0, &hisi_pcie_hip05_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP06 ", 0, &hisi_pcie_hip05_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 0, &hisi_pcie_hip06_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 4, &hisi_pcie_hip07_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 8, &hisi_pcie_hip07_ops) PCI_ACPI_QUIRK_QUAD_DOM("HIP07 ", 12, &hisi_pcie_hip07_ops)
Cov
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com --- drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h>
@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
+#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = { + [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M), + [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M), + [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M), + [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M), + [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M), + [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M), + [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M), + [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M), + [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M), + [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M), + [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M), + [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M), +}; + +static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg) +{ + struct acpi_device *adev = to_acpi_device(cfg->parent); + struct acpi_pci_root *root = acpi_driver_data(adev); + + if ((root->segment >= 4 && root->segment <= 9) || + (root->segment >= 14 && root->segment <= 19)) + return &thunder_pem_reg_res[root->segment]; + + return NULL; +} +#else +static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg) +{ + return NULL; +} +#endif + static int thunder_pem_init(struct pci_config_window *cfg) { struct device *dev = cfg->parent; @@ -292,24 +327,24 @@ static int thunder_pem_init(struct pci_config_window *cfg) struct thunder_pem_pci *pem_pci; struct platform_device *pdev;
- /* Only OF support for now */ - if (!dev->of_node) - return -EINVAL; - pem_pci = devm_kzalloc(dev, sizeof(*pem_pci), GFP_KERNEL); if (!pem_pci) return -ENOMEM;
- pdev = to_platform_device(dev); - - /* - * The second register range is the PEM bridge to the PCIe - * bus. It has a different config access method than those - * devices behind the bridge. - */ - res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (acpi_disabled) { + pdev = to_platform_device(dev); + + /* + * The second register range is the PEM bridge to the PCIe + * bus. It has a different config access method than those + * devices behind the bridge. + */ + res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1); + } else { + res_pem = thunder_pem_acpi_res(cfg); + } if (!res_pem) { - dev_err(dev, "missing "reg[1]"property\n"); + dev_err(dev, "missing configuration region\n"); return -EINVAL; }
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h> @@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); } +#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = {
- [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
- [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
- [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
- [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
- [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
- [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
- [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
- [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
- [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
- [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
- [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
- [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
1) The "correct" way to discover the resources consumed by an ACPI device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
2) This is a little weird because here we define the resource size as 16MB, in the OF case we get the resource size from OF, in either case we ioremap 64K of it, and then as far as I can tell, we only ever access PEM_CFG_WR and PEM_CFG_RD, at offsets 0x28 and 0x30 into the space.
If the hardware actually decodes the entire 16MB, we should ioremap the whole 16MB. (Strictly speaking, drivers only need to ioremap the parts they're using, but in this case nobody claims the entire resource because of deficiencies in the ACPI and OF cores, so the driver should ioremap the entire thing to help prevent conflicts with other devices.)
It'd be nice if we didn't have the 64KB magic number. I think using devm_ioremap_resource() would be nice.
+};
+static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg) +{
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct acpi_pci_root *root = acpi_driver_data(adev);
- if ((root->segment >= 4 && root->segment <= 9) ||
(root->segment >= 14 && root->segment <= 19))
return &thunder_pem_reg_res[root->segment];
- return NULL;
+} +#else +static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg) +{
- return NULL;
+} +#endif
static int thunder_pem_init(struct pci_config_window *cfg) { struct device *dev = cfg->parent; @@ -292,24 +327,24 @@ static int thunder_pem_init(struct pci_config_window *cfg) struct thunder_pem_pci *pem_pci; struct platform_device *pdev;
- /* Only OF support for now */
- if (!dev->of_node)
return -EINVAL;
- pem_pci = devm_kzalloc(dev, sizeof(*pem_pci), GFP_KERNEL); if (!pem_pci) return -ENOMEM;
- pdev = to_platform_device(dev);
- /*
* The second register range is the PEM bridge to the PCIe
* bus. It has a different config access method than those
* devices behind the bridge.
*/
- res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- if (acpi_disabled) {
pdev = to_platform_device(dev);
/*
* The second register range is the PEM bridge to the PCIe
* bus. It has a different config access method than those
* devices behind the bridge.
*/
res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- } else {
res_pem = thunder_pem_acpi_res(cfg);
- } if (!res_pem) {
dev_err(dev, "missing \"reg[1]\"property\n");
return -EINVAL; }dev_err(dev, "missing configuration region\n");
1.9.1
On 19.09.2016 20:09, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h>
@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
+#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = {
- [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
- [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
- [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
- [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
- [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
- [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
- [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
- [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
- [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
- [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
- [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
- [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
The "correct" way to discover the resources consumed by an ACPI device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
OK I will. The reason why we cannot use _CRS for this case is that CONSUMER flag was not use consistently for the bridge so far.
This is a little weird because here we define the resource size as 16MB, in the OF case we get the resource size from OF, in either case we ioremap 64K of it, and then as far as I can tell, we only ever access PEM_CFG_WR and PEM_CFG_RD, at offsets 0x28 and 0x30 into the space.
If the hardware actually decodes the entire 16MB, we should ioremap the whole 16MB. (Strictly speaking, drivers only need to ioremap the parts they're using, but in this case nobody claims the entire resource because of deficiencies in the ACPI and OF cores, so the driver should ioremap the entire thing to help prevent conflicts with other devices.)
It'd be nice if we didn't have the 64KB magic number. I think using devm_ioremap_resource() would be nice.
I agree.
David, is there anything which prevents us from using devm_ioremap_resource() here with SZ_16M size?
Tomasz
[+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 20:09, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h>
@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
+#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = {
- [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
- [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
- [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
- [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
- [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
- [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
- [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
- [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
- [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
- [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
- [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
- [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
- The "correct" way to discover the resources consumed by an ACPI
device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
OK I will. The reason why we cannot use _CRS for this case is that CONSUMER flag was not use consistently for the bridge so far.
Yes, I'm aware of that problem, but hard-coding resources into drivers is just a disaster. The PCI and ACPI cores need generic ways to learn what resources are consumed by devices. For PCI devices, that's done with BARs. For ACPI devices, it's done with _CRS. Without generic resource discovery, we can't manage resources reliably at the system level [1].
You have a PNP0A03/PNP0A08 device for the PCI host bridge. Because of the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS is a window and not consumed by the bridge itself. What if you added a companion ACPI device with a _CRS that contained the bridge resources? Then you'd have some driver ugliness to find that device, but at least the ACPI core could tell what resources were in use.
Maybe Rafael has a better idea?
Bjorn
[1] I know the ACPI core currently doesn't actually *do* anything with _CRS. But I think it *should*, and someday it might, so I want to preserve the principle of using _CRS to document all the resources.
On 20 September 2016 at 14:33, Bjorn Helgaas helgaas@kernel.org wrote:
[+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 20:09, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h>
@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
+#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = {
- [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
- [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
- [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
- [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
- [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
- [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
- [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
- [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
- [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
- [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
- [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
- [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
- The "correct" way to discover the resources consumed by an ACPI
device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
OK I will. The reason why we cannot use _CRS for this case is that CONSUMER flag was not use consistently for the bridge so far.
Yes, I'm aware of that problem, but hard-coding resources into drivers is just a disaster. The PCI and ACPI cores need generic ways to learn what resources are consumed by devices. For PCI devices, that's done with BARs. For ACPI devices, it's done with _CRS. Without generic resource discovery, we can't manage resources reliably at the system level [1].
You have a PNP0A03/PNP0A08 device for the PCI host bridge. Because of the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS is a window and not consumed by the bridge itself. What if you added a companion ACPI device with a _CRS that contained the bridge resources? Then you'd have some driver ugliness to find that device, but at least the ACPI core could tell what resources were in use.
Maybe Rafael has a better idea?
In the discussions leading up to this, we tried very hard to make this arm64/acpi quirks mechanism just as flexible as we need it to be to cover the current crop of incompatible hardware, but not more so. Going forward, we intend to require all arm64/acpi hardware to be spec compliant, and so any parametrization beyond what is required for the currently known broken hardware is only going to make it easier for others to ship with tweaked ACPI descriptions so that an existing quirk is triggered for hardware that it was not intended for. It also implies that we have to deal with the ACPI descriptions as they were shipped with the current hardware.
That does not mean, of course, that we should use bare constants rather than symbolic ones, but anything beyond that exceeds the desired scope of quirks handling.
Hi Ard,
On Tue, Sep 20, 2016 at 02:40:13PM +0100, Ard Biesheuvel wrote:
On 20 September 2016 at 14:33, Bjorn Helgaas helgaas@kernel.org wrote:
[+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 20:09, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h>
@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
+#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = {
- [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
- [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
- [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
- [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
- [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
- [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
- [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
- [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
- [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
- [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
- [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
- [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
- The "correct" way to discover the resources consumed by an ACPI
device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
OK I will. The reason why we cannot use _CRS for this case is that CONSUMER flag was not use consistently for the bridge so far.
Yes, I'm aware of that problem, but hard-coding resources into drivers is just a disaster. The PCI and ACPI cores need generic ways to learn what resources are consumed by devices. For PCI devices, that's done with BARs. For ACPI devices, it's done with _CRS. Without generic resource discovery, we can't manage resources reliably at the system level [1].
You have a PNP0A03/PNP0A08 device for the PCI host bridge. Because of the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS is a window and not consumed by the bridge itself. What if you added a companion ACPI device with a _CRS that contained the bridge resources? Then you'd have some driver ugliness to find that device, but at least the ACPI core could tell what resources were in use.
Maybe Rafael has a better idea?
In the discussions leading up to this, we tried very hard to make this arm64/acpi quirks mechanism just as flexible as we need it to be to cover the current crop of incompatible hardware, but not more so. Going forward, we intend to require all arm64/acpi hardware to be spec compliant, and so any parametrization beyond what is required for the currently known broken hardware is only going to make it easier for others to ship with tweaked ACPI descriptions so that an existing quirk is triggered for hardware that it was not intended for. It also implies that we have to deal with the ACPI descriptions as they were shipped with the current hardware.
That does not mean, of course, that we should use bare constants rather than symbolic ones, but anything beyond that exceeds the desired scope of quirks handling.
Symbolic vs bare constants is the least of my worries. I'm pretty happy with the current quirk implementation. It's pretty simple and straightforward.
Apparently you shipped broken firmware that doesn't accurately describe system resource usage. Presumably that firmware could be updated, but maybe it's worthwhile to work around it in the kernel, depending on where it got shipped.
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
Bjorn
On 20 September 2016 at 15:05, Bjorn Helgaas helgaas@kernel.org wrote:
Hi Ard,
On Tue, Sep 20, 2016 at 02:40:13PM +0100, Ard Biesheuvel wrote:
On 20 September 2016 at 14:33, Bjorn Helgaas helgaas@kernel.org wrote:
[+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 20:09, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
thunder-pem driver stands for being ACPI based PCI host controller. However, there is no standard way to describe its PEM-specific register ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension to obtain hardcoded addresses from static resource array. Although it is not pretty, it prevents from creating standard mechanism to handle similar cases in future.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 6abaf80..b048761 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/of_pci.h> +#include <linux/pci-acpi.h> #include <linux/pci-ecam.h> #include <linux/platform_device.h>
@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
+#ifdef CONFIG_ACPI +static struct resource thunder_pem_reg_res[] = {
- [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
- [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
- [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
- [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
- [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
- [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
- [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
- [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
- [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
- [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
- [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
- [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
- The "correct" way to discover the resources consumed by an ACPI
device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
OK I will. The reason why we cannot use _CRS for this case is that CONSUMER flag was not use consistently for the bridge so far.
Yes, I'm aware of that problem, but hard-coding resources into drivers is just a disaster. The PCI and ACPI cores need generic ways to learn what resources are consumed by devices. For PCI devices, that's done with BARs. For ACPI devices, it's done with _CRS. Without generic resource discovery, we can't manage resources reliably at the system level [1].
You have a PNP0A03/PNP0A08 device for the PCI host bridge. Because of the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS is a window and not consumed by the bridge itself. What if you added a companion ACPI device with a _CRS that contained the bridge resources? Then you'd have some driver ugliness to find that device, but at least the ACPI core could tell what resources were in use.
Maybe Rafael has a better idea?
In the discussions leading up to this, we tried very hard to make this arm64/acpi quirks mechanism just as flexible as we need it to be to cover the current crop of incompatible hardware, but not more so. Going forward, we intend to require all arm64/acpi hardware to be spec compliant, and so any parametrization beyond what is required for the currently known broken hardware is only going to make it easier for others to ship with tweaked ACPI descriptions so that an existing quirk is triggered for hardware that it was not intended for. It also implies that we have to deal with the ACPI descriptions as they were shipped with the current hardware.
That does not mean, of course, that we should use bare constants rather than symbolic ones, but anything beyond that exceeds the desired scope of quirks handling.
Symbolic vs bare constants is the least of my worries. I'm pretty happy with the current quirk implementation. It's pretty simple and straightforward.
OK, good to know that we are on the right track here.
Apparently you shipped broken firmware that doesn't accurately describe system resource usage. Presumably that firmware could be updated, but maybe it's worthwhile to work around it in the kernel, depending on where it got shipped.
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
On 20 September 2016 at 15:05, Bjorn Helgaas helgaas@kernel.org wrote:
Hi Ard,
On Tue, Sep 20, 2016 at 02:40:13PM +0100, Ard Biesheuvel wrote:
On 20 September 2016 at 14:33, Bjorn Helgaas helgaas@kernel.org wrote:
[+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 20:09, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote: >thunder-pem driver stands for being ACPI based PCI host controller. >However, there is no standard way to describe its PEM-specific register >ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension >to obtain hardcoded addresses from static resource array. >Although it is not pretty, it prevents from creating standard mechanism to >handle similar cases in future. > >Signed-off-by: Tomasz Nowicki tn@semihalf.com >--- > drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++-------- > 1 file changed, 48 insertions(+), 13 deletions(-) > >diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c >index 6abaf80..b048761 100644 >--- a/drivers/pci/host/pci-thunder-pem.c >+++ b/drivers/pci/host/pci-thunder-pem.c >@@ -18,6 +18,7 @@ > #include <linux/init.h> > #include <linux/of_address.h> > #include <linux/of_pci.h> >+#include <linux/pci-acpi.h> > #include <linux/pci-ecam.h> > #include <linux/platform_device.h> > >@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn, > return pci_generic_config_write(bus, devfn, where, size, val); > } > >+#ifdef CONFIG_ACPI >+static struct resource thunder_pem_reg_res[] = { >+ [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M), >+ [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M), >+ [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M), >+ [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M), >+ [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M), >+ [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M), >+ [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M), >+ [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M), >+ [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M), >+ [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M), >+ [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M), >+ [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
- The "correct" way to discover the resources consumed by an ACPI
device is to use the _CRS method. I know there are some issues there for bridges (not the fault of ThunderX!) because there's not a good way to distinguish windows from resources consumed directly by the bridge.
But we should either do this correctly, or include a comment about why we're doing it wrong, so we don't give the impression that this is the right way to do it.
I seem to recall some discussion about why we're doing it this way, but I don't remember the details. It'd be nice to include a summary here.
OK I will. The reason why we cannot use _CRS for this case is that CONSUMER flag was not use consistently for the bridge so far.
Yes, I'm aware of that problem, but hard-coding resources into drivers is just a disaster. The PCI and ACPI cores need generic ways to learn what resources are consumed by devices. For PCI devices, that's done with BARs. For ACPI devices, it's done with _CRS. Without generic resource discovery, we can't manage resources reliably at the system level [1].
You have a PNP0A03/PNP0A08 device for the PCI host bridge. Because of the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS is a window and not consumed by the bridge itself. What if you added a companion ACPI device with a _CRS that contained the bridge resources? Then you'd have some driver ugliness to find that device, but at least the ACPI core could tell what resources were in use.
Maybe Rafael has a better idea?
In the discussions leading up to this, we tried very hard to make this arm64/acpi quirks mechanism just as flexible as we need it to be to cover the current crop of incompatible hardware, but not more so. Going forward, we intend to require all arm64/acpi hardware to be spec compliant, and so any parametrization beyond what is required for the currently known broken hardware is only going to make it easier for others to ship with tweaked ACPI descriptions so that an existing quirk is triggered for hardware that it was not intended for. It also implies that we have to deal with the ACPI descriptions as they were shipped with the current hardware.
That does not mean, of course, that we should use bare constants rather than symbolic ones, but anything beyond that exceeds the desired scope of quirks handling.
Symbolic vs bare constants is the least of my worries. I'm pretty happy with the current quirk implementation. It's pretty simple and straightforward.
OK, good to know that we are on the right track here.
Apparently you shipped broken firmware that doesn't accurately describe system resource usage. Presumably that firmware could be updated, but maybe it's worthwhile to work around it in the kernel, depending on where it got shipped.
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
We're talking about two completely different types of quirks:
1) MCFG quirks to use memory-mapped config space that doesn't quite conform to the ECAM model in the PCIe spec, and
2) Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation for 1). The third patch (ThunderX-specific) is one possibility for 2), but I don't like it because there's no way for generic software like the ACPI core to discover these resources.
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
If we need to work around firmware in the field that doesn't do that, one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
Bjorn
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
We're talking about two completely different types of quirks:
MCFG quirks to use memory-mapped config space that doesn't quite conform to the ECAM model in the PCIe spec, and
Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation for 1). The third patch (ThunderX-specific) is one possibility for 2), but I don't like it because there's no way for generic software like the ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
We discussed this already and I think we should make a decision:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
If we need to work around firmware in the field that doesn't do that, one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
You mean matching PNP0C01/PNP0c02 and create a resource (that has to hardcoded in a static array in the kernel anyway, there is no way to retrieve it otherwise) in the corresponding PNP quirk handler ?
And it is not a given we can match against PNP0c01/PNP0c02.
So it looks like the only solution is allocating an _HID for each host bridge that is not ECAM compliant to add resources to its _CRS (unless the MCFG quirk does not need any additional data/resource, eg "use different set of PCI accessorsi 32-bit vs byte-access").
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Thanks, Lorenzo
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
We're talking about two completely different types of quirks:
MCFG quirks to use memory-mapped config space that doesn't quite conform to the ECAM model in the PCIe spec, and
Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation for 1). The third patch (ThunderX-specific) is one possibility for 2), but I don't like it because there's no way for generic software like the ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
We discussed this already and I think we should make a decision:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } } }
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it with a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } }
Where you could search PNP0C02 devices for a cookie that matched the host bridge.
If we need to work around firmware in the field that doesn't do that, one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
You mean matching PNP0C01/PNP0c02 and create a resource (that has to hardcoded in a static array in the kernel anyway, there is no way to retrieve it otherwise) in the corresponding PNP quirk handler ?
Right. On some hardware we can read the resource out of a device-specific register, as we do in quirk_intel_mch(). But if that's not possible, it would have to be hard-coded.
And it is not a given we can match against PNP0c01/PNP0c02.
So it looks like the only solution is allocating an _HID for each host bridge that is not ECAM compliant to add resources to its _CRS (unless the MCFG quirk does not need any additional data/resource, eg "use different set of PCI accessorsi 32-bit vs byte-access").
It doesn't matter whether it's ECAM-compliant or not. Any memory-mapped config space should be reported via some device's _CRS.
The existing x86 practice is to use PNP0C02 devices for this purpose, and I think we should just follow that practice.
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Right. Well, I assume you mean we should hard-code "non-window resources consumed directly by a bridge". If firmware in the field is broken, we should work around it, and that may mean hard-coding some resources.
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it in a quirk that adds _CRS entries, then the kernel will work just like it would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem, and if we ever used _SRS to assign or move ACPI devices, we would know to avoid the bridge resource.
Bjorn
On Wed, Sep 21, 2016 at 11:04 AM, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
We're talking about two completely different types of quirks:
MCFG quirks to use memory-mapped config space that doesn't quite conform to the ECAM model in the PCIe spec, and
Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation for 1). The third patch (ThunderX-specific) is one possibility for 2), but I don't like it because there's no way for generic software like the ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
We discussed this already and I think we should make a decision:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } } }
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it with a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } }
Where you could search PNP0C02 devices for a cookie that matched the host bridge.
If we need to work around firmware in the field that doesn't do that, one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
You mean matching PNP0C01/PNP0c02 and create a resource (that has to hardcoded in a static array in the kernel anyway, there is no way to retrieve it otherwise) in the corresponding PNP quirk handler ?
Right. On some hardware we can read the resource out of a device-specific register, as we do in quirk_intel_mch(). But if that's not possible, it would have to be hard-coded.
And it is not a given we can match against PNP0c01/PNP0c02.
So it looks like the only solution is allocating an _HID for each host bridge that is not ECAM compliant to add resources to its _CRS (unless the MCFG quirk does not need any additional data/resource, eg "use different set of PCI accessorsi 32-bit vs byte-access").
It doesn't matter whether it's ECAM-compliant or not. Any memory-mapped config space should be reported via some device's _CRS.
The existing x86 practice is to use PNP0C02 devices for this purpose, and I think we should just follow that practice.
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Right. Well, I assume you mean we should hard-code "non-window resources consumed directly by a bridge". If firmware in the field is broken, we should work around it, and that may mean hard-coding some resources.
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it in a quirk that adds _CRS entries, then the kernel will work just like it would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem, and if we ever used _SRS to assign or move ACPI devices, we would know to avoid the bridge resource.
Hi Bjorn,
Are you suggesting to add code similar to functions in linux/drivers/pnp/quirks.c to declare/attach the additional resource that the host need to have when the resource is not in MCFG table?
Bjorn
Regards, Duc Dang.
On Wed, Sep 21, 2016 at 11:58:22AM -0700, Duc Dang wrote:
On Wed, Sep 21, 2016 at 11:04 AM, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
The existing x86 practice is to use PNP0C02 devices for this purpose, and I think we should just follow that practice.
...
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it in a quirk that adds _CRS entries, then the kernel will work just like it would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem, and if we ever used _SRS to assign or move ACPI devices, we would know to avoid the bridge resource.
Are you suggesting to add code similar to functions in linux/drivers/pnp/quirks.c to declare/attach the additional resource that the host need to have when the resource is not in MCFG table?
Yes, but what I'm suggesting is actually a little stronger. This has nothing to do with whether a resource is in the MCFG table or not.
I'm suggesting ACPI firmware should always describe the resource. If the firmware is defective and doesn't describe it, we should add a quirk in pnp/quirks.c to add a resource for it.
Bjorn
Hi Bjorn,
On 21.09.2016 21:18, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 11:58:22AM -0700, Duc Dang wrote:
On Wed, Sep 21, 2016 at 11:04 AM, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
The existing x86 practice is to use PNP0C02 devices for this purpose, and I think we should just follow that practice.
...
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it in a quirk that adds _CRS entries, then the kernel will work just like it would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem, and if we ever used _SRS to assign or move ACPI devices, we would know to avoid the bridge resource.
Are you suggesting to add code similar to functions in linux/drivers/pnp/quirks.c to declare/attach the additional resource that the host need to have when the resource is not in MCFG table?
Yes, but what I'm suggesting is actually a little stronger. This has nothing to do with whether a resource is in the MCFG table or not.
I'm suggesting ACPI firmware should always describe the resource. If the firmware is defective and doesn't describe it, we should add a quirk in pnp/quirks.c to add a resource for it.
Thanks for pointers Bjorn.
ThunderX is the case where we cannot change firmware, also it has no PNP0c02 device in tables. So in order to use pnp/quirks.c we would have to fabricate PNP0c02 in kernel and then add quirk entry. I am looking for the best place to put such emulation code but it seems not trivial.
Thanks, Tomasz
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
We're talking about two completely different types of quirks:
MCFG quirks to use memory-mapped config space that doesn't quite conform to the ECAM model in the PCIe spec, and
Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation for 1). The third patch (ThunderX-specific) is one possibility for 2), but I don't like it because there's no way for generic software like the ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
We discussed this already and I think we should make a decision:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } } }
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it with a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } }
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
1) Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there. 2) It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
[...]
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Right. Well, I assume you mean we should hard-code "non-window resources consumed directly by a bridge". If firmware in the field is broken, we should work around it, and that may mean hard-coding some resources.
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it in a quirk that adds _CRS entries, then the kernel will work just like it would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem, and if we ever used _SRS to assign or move ACPI devices, we would know to avoid the bridge resource.
We are in complete agreement here.
Thanks, Lorenzo
Hi Lorenzo, Bjorn
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 22 September 2016 10:50 To: Bjorn Helgaas Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and
given
that we will not be adding quirks for new broken hardware, we
should
ask ourselves whether having two versions of a quirk, i.e., one
for
broken hardware + currently shipping firmware, and one for the
same
broken hardware with fixed firmware is really an improvement
over what
has been proposed here.
We're talking about two completely different types of quirks:
- MCFG quirks to use memory-mapped config space that doesn't
quite
conform to the ECAM model in the PCIe spec, and
- Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation
for 1).
The third patch (ThunderX-specific) is one possibility for 2),
but I
don't like it because there's no way for generic software like
the
ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
Great thanks as we take a final decision I will ask Dogndgong to submit another RFC based on this approach
We discussed this already and I think we should make a decision:
March/414722.html
I'd like to step back and come up with some understanding of
how
non-broken firmware *should* deal with this issue. Then, if
we *do*
work around this particular broken firmware in the kernel, it
would be
nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred
solution, an
ACPI quirk could fabricate a device with the required
resources. That
would address the problem closer to the source and make it
more likely
that the rest of the system will work correctly: /proc/iomem
could
make sense, things that look at _CRS generically would work
(e.g,
/sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't
provide
any guidance for future platforms and makes it likely that
the hack
will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do
not
rely on quirks', and what I am arguing here is that the more we
polish
up this code and make it clean and reusable, the more likely it
is
that will end up getting abused by new broken hardware that we
set out
to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's
make
sure that this occurs in the cleanest and most compliant way
possible.
But any factoring/reuse concerns other than for the current
crop of
broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need
any
more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is
done
on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } }
}
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it
with
a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
}
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there.
- It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have
to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
Do we really need to formalize this?
As we discussed in the Linaro call at the moment we have few vendors that need quirks and we want to avoid promoting/accepting quirks for the future.
At the time of the call I think we decided to informally accept a set of quirks for the current platforms and reject any other quirk coming after a certain date/kernel version (this to be decided).
I am not sure if there is a way to document/formalize a temporary exception from the rule...
Thanks
Gab
[...]
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Right. Well, I assume you mean we should hard-code "non-window resources consumed directly by a bridge". If firmware in the field
is
broken, we should work around it, and that may mean hard-coding some resources.
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it
in
a quirk that adds _CRS entries, then the kernel will work just like
it
would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and
/proc/iomem,
and if we ever used _SRS to assign or move ACPI devices, we would
know
to avoid the bridge resource.
We are in complete agreement here.
Thanks, Lorenzo
On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
Hi Lorenzo, Bjorn
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 22 September 2016 10:50 To: Bjorn Helgaas Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and
given
that we will not be adding quirks for new broken hardware, we
should
ask ourselves whether having two versions of a quirk, i.e., one
for
broken hardware + currently shipping firmware, and one for the
same
broken hardware with fixed firmware is really an improvement
over what
has been proposed here.
We're talking about two completely different types of quirks:
- MCFG quirks to use memory-mapped config space that doesn't
quite
conform to the ECAM model in the PCIe spec, and
- Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation
for 1).
The third patch (ThunderX-specific) is one possibility for 2),
but I
don't like it because there's no way for generic software like
the
ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
Great thanks as we take a final decision I will ask Dogndgong to submit another RFC based on this approach
We discussed this already and I think we should make a decision:
March/414722.html
> I'd like to step back and come up with some understanding of
how
> non-broken firmware *should* deal with this issue. Then, if
we *do*
> work around this particular broken firmware in the kernel, it
would be
> nice to do it in a way that fits in with that understanding. > > For example, if a companion ACPI device is the preferred
solution, an
> ACPI quirk could fabricate a device with the required
resources. That
> would address the problem closer to the source and make it
more likely
> that the rest of the system will work correctly: /proc/iomem
could
> make sense, things that look at _CRS generically would work
(e.g,
> /sys/, an admittedly hypothetical "lsacpi", etc.) > > Hard-coding stuff in drivers is a point solution that doesn't
provide
> any guidance for future platforms and makes it likely that
the hack
> will get copied into even more drivers. >
OK, I see. But the guidance for future platforms should be 'do
not
rely on quirks', and what I am arguing here is that the more we
polish
up this code and make it clean and reusable, the more likely it
is
that will end up getting abused by new broken hardware that we
set out
to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's
make
sure that this occurs in the cleanest and most compliant way
possible.
But any factoring/reuse concerns other than for the current
crop of
broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need
any
more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is
done
on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } }
}
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it
with
a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
}
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there.
- It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have
to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
Do we really need to formalize this?
As we discussed in the Linaro call at the moment we have few vendors that need quirks and we want to avoid promoting/accepting quirks for the future.
At the time of the call I think we decided to informally accept a set of quirks for the current platforms and reject any other quirk coming after a certain date/kernel version (this to be decided).
I am not sure if there is a way to document/formalize a temporary exception from the rule...
- (1) will be enforced. - We do not know whether PNP0c02 can be used in non-root devices _CRS - Are we sure (given that we are implementing this to make sure we are able to validate resources) that it is valid to have a subdevice with a _CRS whose resources are not contained in its parent _CRS address space (because that's exactly the case for these quirks) ?
That's what I mean by formalizing, I want to know how PNP0c02 should be used. We all want platforms with quirks to be enabled asap but only if we stick to the ACPI specifications. On top of that, with the bindings above, the kernel would end up creating a platform device for the "fake" device with a _CRS approach, which is questionable.
Lorenzo
Thanks
Gab
[...]
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Right. Well, I assume you mean we should hard-code "non-window resources consumed directly by a bridge". If firmware in the field
is
broken, we should work around it, and that may mean hard-coding some resources.
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it
in
a quirk that adds _CRS entries, then the kernel will work just like
it
would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and
/proc/iomem,
and if we ever used _SRS to assign or move ACPI devices, we would
know
to avoid the bridge resource.
We are in complete agreement here.
Thanks, Lorenzo
On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
Hi Lorenzo, Bjorn
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 22 September 2016 10:50 To: Bjorn Helgaas Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
> None of these platforms can be fixed entirely in software, and
given
> that we will not be adding quirks for new broken hardware, we
should
> ask ourselves whether having two versions of a quirk, i.e., one
for
> broken hardware + currently shipping firmware, and one for the
same
> broken hardware with fixed firmware is really an improvement
over what
> has been proposed here.
We're talking about two completely different types of quirks:
- MCFG quirks to use memory-mapped config space that doesn't
quite
conform to the ECAM model in the PCIe spec, and
- Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation
for 1).
The third patch (ThunderX-specific) is one possibility for 2),
but I
don't like it because there's no way for generic software like
the
ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
Great thanks as we take a final decision I will ask Dogndgong to submit another RFC based on this approach
We discussed this already and I think we should make a decision:
March/414722.html
> > I'd like to step back and come up with some understanding of
how
> > non-broken firmware *should* deal with this issue. Then, if
we *do*
> > work around this particular broken firmware in the kernel, it
would be
> > nice to do it in a way that fits in with that understanding. > > > > For example, if a companion ACPI device is the preferred
solution, an
> > ACPI quirk could fabricate a device with the required
resources. That
> > would address the problem closer to the source and make it
more likely
> > that the rest of the system will work correctly: /proc/iomem
could
> > make sense, things that look at _CRS generically would work
(e.g,
> > /sys/, an admittedly hypothetical "lsacpi", etc.) > > > > Hard-coding stuff in drivers is a point solution that doesn't
provide
> > any guidance for future platforms and makes it likely that
the hack
> > will get copied into even more drivers. > > > > OK, I see. But the guidance for future platforms should be 'do
not
> rely on quirks', and what I am arguing here is that the more we
polish
> up this code and make it clean and reusable, the more likely it
is
> that will end up getting abused by new broken hardware that we
set out
> to reject entirely in the first place. > > So of course, if the quirk involves claiming resources, let's
make
> sure that this occurs in the cleanest and most compliant way
possible.
> But any factoring/reuse concerns other than for the current
crop of
> broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need
any
more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is
done
on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } }
}
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it
with
a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
}
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there.
- It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have
to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
Do we really need to formalize this?
As we discussed in the Linaro call at the moment we have few vendors that need quirks and we want to avoid promoting/accepting quirks for the future.
At the time of the call I think we decided to informally accept a set of quirks for the current platforms and reject any other quirk coming after a certain date/kernel version (this to be decided).
I am not sure if there is a way to document/formalize a temporary exception from the rule...
- (1) will be enforced.
I'm not sure it's necessary or possible to enforce a "no future quirks" rule. For one thing, there's already a pretty strong incentive to avoid quirks: if your hardware doesn't require quirks, it works with OSes already in the field.
MCFG quirks allow us to use the generic ACPI pci_root.c driver even if the hardware doesn't support ECAM quite according to the spec.
PNP0C02 usage is a workaround for the failure of the Consumer/Producer bit. PNP0C02 quirks compensate for firmware that doesn't describe resource usage accurately. It's possible the ACPI spec folks could come up with a better Consumer/Producer workaround, if that's needed. Apparently x86 hasn't needed it yet.
If people add _DSD methods for clocks or whatnot, the hardware won't work with the generic pci_root.c driver, so there's already an incentive for avoiding them. x86 has managed without such methods; arm64 should be able to do the same.
- We do not know whether PNP0c02 can be used in non-root devices _CRS
- Are we sure (given that we are implementing this to make sure we are able to validate resources) that it is valid to have a subdevice with a _CRS whose resources are not contained in its parent _CRS address space (because that's exactly the case for these quirks) ?
That's what I mean by formalizing, I want to know how PNP0c02 should be used. We all want platforms with quirks to be enabled asap but only if we stick to the ACPI specifications. On top of that, with the bindings above, the kernel would end up creating a platform device for the "fake" device with a _CRS approach, which is questionable.
[...]
For FW that is immutable I really do not see what we can do apart from hardcoding the non-config resources (consumed by a bridge), somehow.
Right. Well, I assume you mean we should hard-code "non-window resources consumed directly by a bridge". If firmware in the field
is
broken, we should work around it, and that may mean hard-coding some resources.
My point is that the hard-coding should not be buried in a driver where it's invisible to the rest of the kernel. If we hard-code it
in
a quirk that adds _CRS entries, then the kernel will work just like
it
would if the firmware had been correct in the first place. The resource will appear in /sys/devices/pnp*/*/resources and
/proc/iomem,
and if we ever used _SRS to assign or move ACPI devices, we would
know
to avoid the bridge resource.
We are in complete agreement here.
Thanks, Lorenzo
On Thu, Sep 22, 2016 at 01:31:01PM -0500, Bjorn Helgaas wrote:
On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
Hi Lorenzo, Bjorn
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 22 September 2016 10:50 To: Bjorn Helgaas Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote: > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
> > None of these platforms can be fixed entirely in software, and
given
> > that we will not be adding quirks for new broken hardware, we
should
> > ask ourselves whether having two versions of a quirk, i.e., one
for
> > broken hardware + currently shipping firmware, and one for the
same
> > broken hardware with fixed firmware is really an improvement
over what
> > has been proposed here. > > We're talking about two completely different types of quirks: > > 1) MCFG quirks to use memory-mapped config space that doesn't
quite
> conform to the ECAM model in the PCIe spec, and > > 2) Some yet-to-be-determined method to describe address space > consumed by a bridge. > > The first two patches of this series are a nice implementation
for 1).
> The third patch (ThunderX-specific) is one possibility for 2),
but I
> don't like it because there's no way for generic software like
the
> ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
Great thanks as we take a final decision I will ask Dogndgong to submit another RFC based on this approach
We discussed this already and I think we should make a decision:
March/414722.html
> > > I'd like to step back and come up with some understanding of
how
> > > non-broken firmware *should* deal with this issue. Then, if
we *do*
> > > work around this particular broken firmware in the kernel, it
would be
> > > nice to do it in a way that fits in with that understanding. > > > > > > For example, if a companion ACPI device is the preferred
solution, an
> > > ACPI quirk could fabricate a device with the required
resources. That
> > > would address the problem closer to the source and make it
more likely
> > > that the rest of the system will work correctly: /proc/iomem
could
> > > make sense, things that look at _CRS generically would work
(e.g,
> > > /sys/, an admittedly hypothetical "lsacpi", etc.) > > > > > > Hard-coding stuff in drivers is a point solution that doesn't
provide
> > > any guidance for future platforms and makes it likely that
the hack
> > > will get copied into even more drivers. > > > > > > > OK, I see. But the guidance for future platforms should be 'do
not
> > rely on quirks', and what I am arguing here is that the more we
polish
> > up this code and make it clean and reusable, the more likely it
is
> > that will end up getting abused by new broken hardware that we
set out
> > to reject entirely in the first place. > > > > So of course, if the quirk involves claiming resources, let's
make
> > sure that this occurs in the cleanest and most compliant way
possible.
> > But any factoring/reuse concerns other than for the current
crop of
> > broken hardware should be avoided imo. > > If future hardware is completely ECAM-compliant and we don't need
any
> more MCFG quirks, that would be great.
Yes.
> But we'll still need to describe that memory-mapped config space > somewhere. If that's done with PNP0C02 or similar devices (as is
done
> on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } }
}
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it
with
a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
}
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there.
- It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have
to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
Do we really need to formalize this?
As we discussed in the Linaro call at the moment we have few vendors that need quirks and we want to avoid promoting/accepting quirks for the future.
At the time of the call I think we decided to informally accept a set of quirks for the current platforms and reject any other quirk coming after a certain date/kernel version (this to be decided).
I am not sure if there is a way to document/formalize a temporary exception from the rule...
- (1) will be enforced.
I'm not sure it's necessary or possible to enforce a "no future quirks" rule. For one thing, there's already a pretty strong incentive to avoid quirks: if your hardware doesn't require quirks, it works with OSes already in the field.
MCFG quirks allow us to use the generic ACPI pci_root.c driver even if the hardware doesn't support ECAM quite according to the spec.
PNP0C02 usage is a workaround for the failure of the Consumer/Producer bit. PNP0C02 quirks compensate for firmware that doesn't describe resource usage accurately. It's possible the ACPI spec folks could come up with a better Consumer/Producer workaround, if that's needed. Apparently x86 hasn't needed it yet.
If people add _DSD methods for clocks or whatnot, the hardware won't work with the generic pci_root.c driver, so there's already an incentive for avoiding them. x86 has managed without such methods; arm64 should be able to do the same.
Re-reading this, I'm afraid my response sounds a little dismissive, and I feel like I'm missing some important information. So I apologize if I missed your whole point, Lorenzo.
Bjorn
[+ Zhang Rui]
On Thu, Sep 22, 2016 at 05:10:42PM -0500, Bjorn Helgaas wrote:
On Thu, Sep 22, 2016 at 01:31:01PM -0500, Bjorn Helgaas wrote:
On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
Hi Lorenzo, Bjorn
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 22 September 2016 10:50 To: Bjorn Helgaas Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote: > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote: > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote: > > [...] > > > > None of these platforms can be fixed entirely in software, and
given
> > > that we will not be adding quirks for new broken hardware, we
should
> > > ask ourselves whether having two versions of a quirk, i.e., one
for
> > > broken hardware + currently shipping firmware, and one for the
same
> > > broken hardware with fixed firmware is really an improvement
over what
> > > has been proposed here. > > > > We're talking about two completely different types of quirks: > > > > 1) MCFG quirks to use memory-mapped config space that doesn't
quite
> > conform to the ECAM model in the PCIe spec, and > > > > 2) Some yet-to-be-determined method to describe address space > > consumed by a bridge. > > > > The first two patches of this series are a nice implementation
for 1).
> > The third patch (ThunderX-specific) is one possibility for 2),
but I
> > don't like it because there's no way for generic software like
the
> > ACPI core to discover these resources. > > Ok, so basically this means that to implement (2) we need to assign > some sort of _HID to these quirky PCI bridges (so that we know what > device they represent and we can retrieve their _CRS). I take from > this discussion that the goal is to make sure that all non-config > resources have to be declared through _CRS device objects, which is > fine but that requires a FW update (unless we can fabricate ACPI > devices and corresponding _CRS in the kernel whenever we match a > given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
Great thanks as we take a final decision I will ask Dogndgong to submit another RFC based on this approach
> We discussed this already and I think we should make a decision: > > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-
March/414722.html
> > > > > I'd like to step back and come up with some understanding of
how
> > > > non-broken firmware *should* deal with this issue. Then, if
we *do*
> > > > work around this particular broken firmware in the kernel, it
would be
> > > > nice to do it in a way that fits in with that understanding. > > > > > > > > For example, if a companion ACPI device is the preferred
solution, an
> > > > ACPI quirk could fabricate a device with the required
resources. That
> > > > would address the problem closer to the source and make it
more likely
> > > > that the rest of the system will work correctly: /proc/iomem
could
> > > > make sense, things that look at _CRS generically would work
(e.g,
> > > > /sys/, an admittedly hypothetical "lsacpi", etc.) > > > > > > > > Hard-coding stuff in drivers is a point solution that doesn't
provide
> > > > any guidance for future platforms and makes it likely that
the hack
> > > > will get copied into even more drivers. > > > > > > > > > > OK, I see. But the guidance for future platforms should be 'do
not
> > > rely on quirks', and what I am arguing here is that the more we
polish
> > > up this code and make it clean and reusable, the more likely it
is
> > > that will end up getting abused by new broken hardware that we
set out
> > > to reject entirely in the first place. > > > > > > So of course, if the quirk involves claiming resources, let's
make
> > > sure that this occurs in the cleanest and most compliant way
possible.
> > > But any factoring/reuse concerns other than for the current
crop of
> > > broken hardware should be avoided imo. > > > > If future hardware is completely ECAM-compliant and we don't need
any
> > more MCFG quirks, that would be great. > > Yes. > > > But we'll still need to describe that memory-mapped config space > > somewhere. If that's done with PNP0C02 or similar devices (as is
done
> > on my x86 laptop), we'd be all set. > > I am not sure I understand what you mean here. Are you referring > to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
> IIUC PNP0C02 is a reservation mechanism, but it does not help us > associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } }
}
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it
with
a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources
(windows) }
}
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there.
- It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have
to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
Do we really need to formalize this?
As we discussed in the Linaro call at the moment we have few vendors that need quirks and we want to avoid promoting/accepting quirks for the future.
At the time of the call I think we decided to informally accept a set of quirks for the current platforms and reject any other quirk coming after a certain date/kernel version (this to be decided).
I am not sure if there is a way to document/formalize a temporary exception from the rule...
- (1) will be enforced.
I'm not sure it's necessary or possible to enforce a "no future quirks" rule. For one thing, there's already a pretty strong incentive to avoid quirks: if your hardware doesn't require quirks, it works with OSes already in the field.
MCFG quirks allow us to use the generic ACPI pci_root.c driver even if the hardware doesn't support ECAM quite according to the spec.
PNP0C02 usage is a workaround for the failure of the Consumer/Producer bit. PNP0C02 quirks compensate for firmware that doesn't describe resource usage accurately. It's possible the ACPI spec folks could come up with a better Consumer/Producer workaround, if that's needed. Apparently x86 hasn't needed it yet.
If people add _DSD methods for clocks or whatnot, the hardware won't work with the generic pci_root.c driver, so there's already an incentive for avoiding them. x86 has managed without such methods; arm64 should be able to do the same.
Re-reading this, I'm afraid my response sounds a little dismissive, and I feel like I'm missing some important information. So I apologize if I missed your whole point, Lorenzo.
No you are spot on, I just wanted to emphasize, given that we are adding an _HID and a subdevice, that developer should not be tempted to use it to match against a PCI host driver to reuse the DT code, we should not use the quirk mechanism as a backdoor to re-using DT drivers in ACPI context.
Anyway, there is a review process to spot these possible misuses, mine was just a heads-up, quirks will happen, I just do not want to wreak the standard ACPI PCI firmware model to support them.
Given that there are already PNP0c02 bindings out there where the PNP0c02 is used as in Gab's example:
https://patchwork.kernel.org/patch/4757111/
I think the only pending question I have is whether we are allowed to define a PNP0A03 subdevice with a _CRS resource space that is not contained in its parent _CRS, if we answer this question I think we are done.
I will raise the PNP0c02 usage issue with the ASWG anyway.
Thanks ! Lorenzo
Hi Lorenzo
-----Original Message----- From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel- owner@vger.kernel.org] On Behalf Of Lorenzo Pieralisi Sent: 23 September 2016 11:12 To: Bjorn Helgaas Cc: Gabriele Paoloni; Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki; rui.zhang@intel.com Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
[+ Zhang Rui]
On Thu, Sep 22, 2016 at 05:10:42PM -0500, Bjorn Helgaas wrote:
On Thu, Sep 22, 2016 at 01:31:01PM -0500, Bjorn Helgaas wrote:
On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
Hi Lorenzo, Bjorn
-----Original Message----- From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com] Sent: 22 September 2016 10:50 To: Bjorn Helgaas Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon;
Catalin
Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan
Kaya;
Jayachandran C; Christopher Covington; Duc Dang; Robert
Richter; Marcin
Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
Linaro ACPI
Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton;
liudongdong
(C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org;
linux-
kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe
PEM-
specific register range for ACPI case
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas
wrote:
> On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi
wrote:
> > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas
wrote:
> > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard
Biesheuvel wrote:
> > > > [...] > > > > > > None of these platforms can be fixed entirely in
software, and
given > > > > that we will not be adding quirks for new broken
hardware, we
should > > > > ask ourselves whether having two versions of a quirk,
i.e., one
for > > > > broken hardware + currently shipping firmware, and
one for the
same > > > > broken hardware with fixed firmware is really an
improvement
over what > > > > has been proposed here. > > > > > > We're talking about two completely different types of
quirks:
> > > > > > 1) MCFG quirks to use memory-mapped config space that
doesn't
quite > > > conform to the ECAM model in the PCIe spec, and > > > > > > 2) Some yet-to-be-determined method to describe
address space
> > > consumed by a bridge. > > > > > > The first two patches of this series are a nice
implementation
for 1). > > > The third patch (ThunderX-specific) is one possibility
for 2),
but I > > > don't like it because there's no way for generic
software like
the > > > ACPI core to discover these resources. > > > > Ok, so basically this means that to implement (2) we need
to assign
> > some sort of _HID to these quirky PCI bridges (so that we
know what
> > device they represent and we can retrieve their _CRS). I
take from
> > this discussion that the goal is to make sure that all
non-config
> > resources have to be declared through _CRS device
objects, which is
> > fine but that requires a FW update (unless we can
fabricate ACPI
> > devices and corresponding _CRS in the kernel whenever we
match a
> > given MCFG table signature). > > All resources consumed by ACPI devices should be declared
through
> _CRS. If you want to fabricate ACPI devices or _CRS via
kernel
> quirks, that's fine with me. This could be triggered via
MCFG
> signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward
by Gab
is enough.
Great thanks as we take a final decision I will ask Dogndgong
to submit
another RFC based on this approach
> > We discussed this already and I think we should make a
decision:
kernel/2016-
March/414722.html > > > > > > > I'd like to step back and come up with some
understanding of
how > > > > > non-broken firmware *should* deal with this issue.
Then, if
we *do* > > > > > work around this particular broken firmware in the
kernel, it
would be > > > > > nice to do it in a way that fits in with that
understanding.
> > > > > > > > > > For example, if a companion ACPI device is the
preferred
solution, an > > > > > ACPI quirk could fabricate a device with the
required
resources. That > > > > > would address the problem closer to the source and
make it
more likely > > > > > that the rest of the system will work correctly:
/proc/iomem
could > > > > > make sense, things that look at _CRS generically
would work
(e.g, > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.) > > > > > > > > > > Hard-coding stuff in drivers is a point solution
that doesn't
provide > > > > > any guidance for future platforms and makes it
likely that
the hack > > > > > will get copied into even more drivers. > > > > > > > > > > > > > OK, I see. But the guidance for future platforms
should be 'do
not > > > > rely on quirks', and what I am arguing here is that
the more we
polish > > > > up this code and make it clean and reusable, the more
likely it
is > > > > that will end up getting abused by new broken
hardware that we
set out > > > > to reject entirely in the first place. > > > > > > > > So of course, if the quirk involves claiming
resources, let's
make > > > > sure that this occurs in the cleanest and most
compliant way
possible. > > > > But any factoring/reuse concerns other than for the
current
crop of > > > > broken hardware should be avoided imo. > > > > > > If future hardware is completely ECAM-compliant and we
don't need
any > > > more MCFG quirks, that would be great. > > > > Yes. > > > > > But we'll still need to describe that memory-mapped
config space
> > > somewhere. If that's done with PNP0C02 or similar
devices (as is
done > > > on my x86 laptop), we'd be all set. > > > > I am not sure I understand what you mean here. Are you
referring
> > to MCFG regions reported as PNP0c02 resources through its
_CRS ?
> > Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says
address ranges
> reported via MCFG or _CBA should be reserved by _CRS of a
PNP0C02
> device.
Ok, that's agreed. It goes without saying that since you are
quoting
the PCI spec, if FW fails to report MCFG regions in a PNP0c02
device
_CRS I will consider that a FW bug.
> > IIUC PNP0C02 is a reservation mechanism, but it does not
help us
> > associate its _CRS to a specific PCI host bridge
instance, right ?
> > Gab proposed a hierarchy that *would* associate a PNP0C02
device with
> a PCI bridge: > > Device (PCI1) { > Name (_HID, "HISI0080") // PCI Express Root Bridge > Name (_CID, "PNP0A03") // Compatible PCI Root Bridge > Method (_CRS, 0, Serialized) { // Root complex
resources
(windows) } > Device (RES0) { > Name (_HID, "HISI0081") // HiSi PCIe RC config base
address
> Name (_CID, "PNP0C02") // Motherboard reserved
resource
> Name (_CRS, ResourceTemplate () { ... } > } > } > > That's a possibility. The PCI Firmware Spec suggests
putting RES0 at
> the root (under _SB), but I don't know why. > > Putting it at the root means we couldn't generically
associate it
with > a bridge, although I could imagine something like this: > > Device (RES1) { > Name (_HID, "HISI0081") // HiSi PCIe RC config base
address
> Name (_CID, "PNP0C02") // Motherboard reserved
resource
> Name (_CRS, ResourceTemplate () { ... } > Method (BRDG) { "PCI1" } // hand-wavy ASL > } > Device (PCI1) { > Name (_HID, "HISI0080") // PCI Express Root Bridge > Name (_CID, "PNP0A03") // Compatible PCI Root Bridge > Method (_CRS, 0, Serialized) { // Root complex
resources
(windows) } > } > > Where you could search PNP0C02 devices for a cookie that
matched the
> host bridge.o
Ok, I am fine with both and I think we are converging, but
the way
to solve this problem has to be uniform for all ARM partners
(and
not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS
reporting the
non-window bridge resources. Fine. It also allows people
to chuck in
there all sorts of _DSD properties to describe their PCI
host bridge
as it is done with DT properties (those _DSD can contain
eg clocks
etc.), this may be tempting (so that they can reuse the
same DT
driver and do not have to update their firmware) but I
want to be
clear here: that must not happen. So, a subdevice with a
_CRS to
report resources, yes, but it will stop there. 2) It is unclear to me how to formalize the above. People
should not
write FW by reading the PCI mailing list, so these
guidelines have
to be written, somehow. I do not want to standardize quirks,
I want
to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If
we do
not do that everyone will go solve this problem as they
deem fit.
Do we really need to formalize this?
As we discussed in the Linaro call at the moment we have few
vendors
that need quirks and we want to avoid promoting/accepting
quirks for
the future.
At the time of the call I think we decided to informally accept
a set
of quirks for the current platforms and reject any other quirk
coming
after a certain date/kernel version (this to be decided).
I am not sure if there is a way to document/formalize a
temporary
exception from the rule...
- (1) will be enforced.
I'm not sure it's necessary or possible to enforce a "no future quirks" rule. For one thing, there's already a pretty strong incentive to avoid quirks: if your hardware doesn't require quirks, it works with OSes already in the field.
MCFG quirks allow us to use the generic ACPI pci_root.c driver even
if
the hardware doesn't support ECAM quite according to the spec.
PNP0C02 usage is a workaround for the failure of the
Consumer/Producer
bit. PNP0C02 quirks compensate for firmware that doesn't describe resource usage accurately. It's possible the ACPI spec folks could come up with a better Consumer/Producer workaround, if that's
needed.
Apparently x86 hasn't needed it yet.
If people add _DSD methods for clocks or whatnot, the hardware
won't
work with the generic pci_root.c driver, so there's already an incentive for avoiding them. x86 has managed without such methods; arm64 should be able to do the same.
Re-reading this, I'm afraid my response sounds a little dismissive, and I feel like I'm missing some important information. So I apologize if I missed your whole point, Lorenzo.
No you are spot on, I just wanted to emphasize, given that we are adding an _HID and a subdevice, that developer should not be tempted to use it to match against a PCI host driver to reuse the DT code, we should not use the quirk mechanism as a backdoor to re-using DT drivers in ACPI context.
Anyway, there is a review process to spot these possible misuses, mine was just a heads-up, quirks will happen, I just do not want to wreak the standard ACPI PCI firmware model to support them.
Given that there are already PNP0c02 bindings out there where the PNP0c02 is used as in Gab's example:
https://patchwork.kernel.org/patch/4757111/
I think the only pending question I have is whether we are allowed to define a PNP0A03 subdevice with a _CRS resource space that is not contained in its parent _CRS, if we answer this question I think we are done.
FMU part of your question is answered in the PCI Firmware specs https://members.pcisig.com/wg/PCI-SIG/document/download/8232
Where from note 2 of 4.1.2 I quote: "For most systems, the motherboard resource would appear at the root of the ACPI namespace (under _SB) in a node with a _HID of EISAID (PNP0C02), and the resources in this case should not be claimed in the root PCI bus's _CRS"
My interpretation is that the resource claimed in the PNP0C02 node must never be in the PNP0A03 _CRS.
Now about having the PNP0C02 node under _SB or as a sub-device we see that the note above points out that most of system have it under _SB but I read it as a quite relaxed condition....
BTW this is just my interpretation...
Thanks
Gab
I will raise the PNP0c02 usage issue with the ASWG anyway.
Thanks ! Lorenzo
On 09/22/2016 05:49 AM, Lorenzo Pieralisi wrote:
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
[...]
None of these platforms can be fixed entirely in software, and given that we will not be adding quirks for new broken hardware, we should ask ourselves whether having two versions of a quirk, i.e., one for broken hardware + currently shipping firmware, and one for the same broken hardware with fixed firmware is really an improvement over what has been proposed here.
We're talking about two completely different types of quirks:
MCFG quirks to use memory-mapped config space that doesn't quite conform to the ECAM model in the PCIe spec, and
Some yet-to-be-determined method to describe address space consumed by a bridge.
The first two patches of this series are a nice implementation for 1). The third patch (ThunderX-specific) is one possibility for 2), but I don't like it because there's no way for generic software like the ACPI core to discover these resources.
Ok, so basically this means that to implement (2) we need to assign some sort of _HID to these quirky PCI bridges (so that we know what device they represent and we can retrieve their _CRS). I take from this discussion that the goal is to make sure that all non-config resources have to be declared through _CRS device objects, which is fine but that requires a FW update (unless we can fabricate ACPI devices and corresponding _CRS in the kernel whenever we match a given MCFG table signature).
All resources consumed by ACPI devices should be declared through _CRS. If you want to fabricate ACPI devices or _CRS via kernel quirks, that's fine with me. This could be triggered via MCFG signature, DMI info, host bridge _HID, etc.
I think the PNP quirk approach + PNP0c02 resource put forward by Gab is enough.
We discussed this already and I think we should make a decision:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
I'd like to step back and come up with some understanding of how non-broken firmware *should* deal with this issue. Then, if we *do* work around this particular broken firmware in the kernel, it would be nice to do it in a way that fits in with that understanding.
For example, if a companion ACPI device is the preferred solution, an ACPI quirk could fabricate a device with the required resources. That would address the problem closer to the source and make it more likely that the rest of the system will work correctly: /proc/iomem could make sense, things that look at _CRS generically would work (e.g, /sys/, an admittedly hypothetical "lsacpi", etc.)
Hard-coding stuff in drivers is a point solution that doesn't provide any guidance for future platforms and makes it likely that the hack will get copied into even more drivers.
OK, I see. But the guidance for future platforms should be 'do not rely on quirks', and what I am arguing here is that the more we polish up this code and make it clean and reusable, the more likely it is that will end up getting abused by new broken hardware that we set out to reject entirely in the first place.
So of course, if the quirk involves claiming resources, let's make sure that this occurs in the cleanest and most compliant way possible. But any factoring/reuse concerns other than for the current crop of broken hardware should be avoided imo.
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
Yes.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
I am not sure I understand what you mean here. Are you referring to MCFG regions reported as PNP0c02 resources through its _CRS ?
Yes. PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02 device.
Ok, that's agreed. It goes without saying that since you are quoting the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device _CRS I will consider that a FW bug.
IIUC PNP0C02 is a reservation mechanism, but it does not help us associate its _CRS to a specific PCI host bridge instance, right ?
Gab proposed a hierarchy that *would* associate a PNP0C02 device with a PCI bridge:
Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } Device (RES0) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } } }
That's a possibility. The PCI Firmware Spec suggests putting RES0 at the root (under _SB), but I don't know why.
Putting it at the root means we couldn't generically associate it with a bridge, although I could imagine something like this:
Device (RES1) { Name (_HID, "HISI0081") // HiSi PCIe RC config base address Name (_CID, "PNP0C02") // Motherboard reserved resource Name (_CRS, ResourceTemplate () { ... } Method (BRDG) { "PCI1" } // hand-wavy ASL } Device (PCI1) { Name (_HID, "HISI0080") // PCI Express Root Bridge Name (_CID, "PNP0A03") // Compatible PCI Root Bridge Method (_CRS, 0, Serialized) { // Root complex resources (windows) } }
Where you could search PNP0C02 devices for a cookie that matched the host bridge.o
Ok, I am fine with both and I think we are converging, but the way to solve this problem has to be uniform for all ARM partners (and not only ARM). Two points here:
- Adding a device/subdevice allows people to add a _CRS reporting the non-window bridge resources. Fine. It also allows people to chuck in there all sorts of _DSD properties to describe their PCI host bridge as it is done with DT properties (those _DSD can contain eg clocks etc.), this may be tempting (so that they can reuse the same DT driver and do not have to update their firmware) but I want to be clear here: that must not happen. So, a subdevice with a _CRS to report resources, yes, but it will stop there.
- It is unclear to me how to formalize the above. People should not write FW by reading the PCI mailing list, so these guidelines have to be written, somehow. I do not want to standardize quirks, I want to prevent random ACPI table content, which is different. Should I report this to the ACPI spec working group ? If we do not do that everyone will go solve this problem as they deem fit.
Could you add some checks to fwts?
Cov
Hi Bjorn
[...]
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
If we need to work around firmware in the field that doesn't do that, one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
So, if my understanding is correct, for platforms that have not been shipped yet you propose to use PNP0C02 in the ACPI table in order to declare a motherboard reserved resource whereas for shipped platforms you propose to have a quirk along pnp_fixups in order to track the resource usage even if values are hardcoded...correct?
Before Tomasz came up with this patchset we had a call between the vendors involved in this PCI quirks saga and other guys from Linaro and ARM.
Lorenzo summarized the outcome as in the following link http://lkml.iu.edu/hypermail/linux/kernel/1606.2/03344.html
Since this quirks mechanism has been discussed for quite a long time now IMHO it would be good to have a last call including also you (Bjorn) so that we can all agree on what to do and we avoid changing our drivers again and again...
What do you think?
Thanks
Gab
Bjorn
On Wed, Sep 21, 2016 at 02:10:55PM +0000, Gabriele Paoloni wrote:
Hi Bjorn
[...]
If future hardware is completely ECAM-compliant and we don't need any more MCFG quirks, that would be great.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is done on my x86 laptop), we'd be all set.
If we need to work around firmware in the field that doesn't do that, one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
So, if my understanding is correct, for platforms that have not been shipped yet you propose to use PNP0C02 in the ACPI table in order to declare a motherboard reserved resource whereas for shipped platforms you propose to have a quirk along pnp_fixups in order to track the resource usage even if values are hardcoded...correct?
Yes. I'm open to alternate proposals, but x86 uses PNP0C02, and following existing practice seems reasonable.
Before Tomasz came up with this patchset we had a call between the vendors involved in this PCI quirks saga and other guys from Linaro and ARM.
Lorenzo summarized the outcome as in the following link http://lkml.iu.edu/hypermail/linux/kernel/1606.2/03344.html
Since this quirks mechanism has been discussed for quite a long time now IMHO it would be good to have a last call including also you (Bjorn) so that we can all agree on what to do and we avoid changing our drivers again and again...
I think we're converging pretty fast. As far as I'm concerned, the v6 ECAM quirks implementation is perfect. The only remaining issue is reporting the ECAM resources, and I haven't seen objections to using PNP0C02 + PNP quirks for broken firmware.
There is the question of how or whether to associate a PNP0A03 PCI bridge with resources from a different PNP0C02 device, but that's not super important. If the hard-coded resources appear both in a quirk and in the PCI bridge driver, it's ugly but not the end of the world. We've still achieved the objective of avoiding landmines in the address space.
Bjorn
Hi Bjorn
-----Original Message----- From: Bjorn Helgaas [mailto:helgaas@kernel.org] Sent: 21 September 2016 19:59 To: Gabriele Paoloni Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin Marinas; Rafael Wysocki; Lorenzo Pieralisi; Arnd Bergmann; Hanjun Guo; Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux- pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong (C); Jeff Hugo; linux-acpi@vger.kernel.org; linux- kernel@vger.kernel.org; Rafael J. Wysocki Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM- specific register range for ACPI case
On Wed, Sep 21, 2016 at 02:10:55PM +0000, Gabriele Paoloni wrote:
Hi Bjorn
[...]
If future hardware is completely ECAM-compliant and we don't need
any
more MCFG quirks, that would be great.
But we'll still need to describe that memory-mapped config space somewhere. If that's done with PNP0C02 or similar devices (as is
done
on my x86 laptop), we'd be all set.
If we need to work around firmware in the field that doesn't do
that,
one possibility is a PNP quirk along the lines of quirk_amd_mmconfig_area().
So, if my understanding is correct, for platforms that have not been shipped yet you propose to use PNP0C02 in the ACPI table in order to declare a motherboard reserved resource whereas for shipped platforms you propose to have a quirk along pnp_fixups in order to track the resource usage even if values are hardcoded...correct?
Yes. I'm open to alternate proposals, but x86 uses PNP0C02, and following existing practice seems reasonable.
Before Tomasz came up with this patchset we had a call between the
vendors
involved in this PCI quirks saga and other guys from Linaro and ARM.
Lorenzo summarized the outcome as in the following link http://lkml.iu.edu/hypermail/linux/kernel/1606.2/03344.html
Since this quirks mechanism has been discussed for quite a long time
now
IMHO it would be good to have a last call including also you (Bjorn)
so
that we can all agree on what to do and we avoid changing our drivers
again
and again...
I think we're converging pretty fast. As far as I'm concerned, the v6 ECAM quirks implementation is perfect. The only remaining issue is reporting the ECAM resources, and I haven't seen objections to using PNP0C02 + PNP quirks for broken firmware.
There is the question of how or whether to associate a PNP0A03 PCI bridge with resources from a different PNP0C02 device, but that's not super important. If the hard-coded resources appear both in a quirk and in the PCI bridge driver, it's ugly but not the end of the world. We've still achieved the objective of avoiding landmines in the address space.
Ok got it many thanks
Gab
Bjorn
ThunderX PCIe controller to off-chip devices (so-called PEM) is not fully compliant with ECAM standard. It uses non-standard configuration space accessors (see pci_thunder_pem_ops) and custom configuration space granulation (see bus_shift = 24). In order to access configuration space and probe PEM as ACPI based PCI host controller we need to add MCFG quirk infrastructure. This involves: 1. Export PEM pci_thunder_pem_ops structure so it is visible to MCFG quirk code. 2. New quirk entries for each PEM segment. Each contains platform IDs, mentioned pci_thunder_pem_ops and CFG resources.
Quirk is considered for ThunderX silicon pass2.x only which is identified via MCFG revision 1.
Signed-off-by: Tomasz Nowicki tn@semihalf.com --- drivers/acpi/pci_mcfg.c | 27 +++++++++++++++++++++++++++ drivers/pci/host/pci-thunder-pem.c | 2 +- include/linux/pci-ecam.h | 4 ++++ 3 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 2b8acc7..1f73d7b 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -51,6 +51,33 @@ struct mcfg_fixup {
static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +#ifdef CONFIG_PCI_HOST_THUNDER_PEM + /* SoC pass2.x */ + { "CAVIUM", "THUNDERX", 1, 4, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x88001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 5, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x884057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 6, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x88808f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 7, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x89001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 8, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x894057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 9, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x89808f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 14, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x98001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 15, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x984057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 16, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x98808f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 17, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x99001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 18, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x994057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 1, 19, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x99808f000000UL, 0x39 * SZ_16M) }, +#endif };
static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index b048761..d7c10cc 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -367,7 +367,7 @@ static int thunder_pem_init(struct pci_config_window *cfg) return 0; }
-static struct pci_ecam_ops pci_thunder_pem_ops = { +struct pci_ecam_ops pci_thunder_pem_ops = { .bus_shift = 24, .init = thunder_pem_init, .pci_ops = { diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 7adad20..65505ea 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -58,6 +58,10 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn, int where); /* default ECAM ops */ extern struct pci_ecam_ops pci_generic_ecam_ops; +/* ECAM ops for known quirks */ +#ifdef CONFIG_PCI_HOST_THUNDER_PEM +extern struct pci_ecam_ops pci_thunder_pem_ops; +#endif
#ifdef CONFIG_PCI_HOST_GENERIC /* for DT-based PCI controllers that support ECAM */
On Fri, Sep 09, 2016 at 09:24:06PM +0200, Tomasz Nowicki wrote:
ThunderX PCIe controller to off-chip devices (so-called PEM) is not fully compliant with ECAM standard. It uses non-standard configuration space accessors (see pci_thunder_pem_ops) and custom configuration space granulation (see bus_shift = 24). In order to access configuration space and probe PEM as ACPI based PCI host controller we need to add MCFG quirk infrastructure. This involves:
- Export PEM pci_thunder_pem_ops structure so it is visible to MCFG quirk code.
- New quirk entries for each PEM segment. Each contains platform IDs, mentioned pci_thunder_pem_ops and CFG resources.
Quirk is considered for ThunderX silicon pass2.x only which is identified via MCFG revision 1.
Is it really the case that silicon pass2.x has MCFG revision 1, and silicon pass1.x has MCFG revision 2? That just seems backwards.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
drivers/acpi/pci_mcfg.c | 27 +++++++++++++++++++++++++++ drivers/pci/host/pci-thunder-pem.c | 2 +- include/linux/pci-ecam.h | 4 ++++ 3 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 2b8acc7..1f73d7b 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -51,6 +51,33 @@ struct mcfg_fixup { static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +#ifdef CONFIG_PCI_HOST_THUNDER_PEM
- /* SoC pass2.x */
- { "CAVIUM", "THUNDERX", 1, 4, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x88001f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 5, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x884057000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 6, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x88808f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 7, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x89001f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 8, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x894057000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 9, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x89808f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 14, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x98001f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 15, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x984057000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 16, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x98808f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 17, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x99001f000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 18, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x994057000000UL, 0x39 * SZ_16M) },
- { "CAVIUM", "THUNDERX", 1, 19, MCFG_BUS_ANY, &pci_thunder_pem_ops,
DEFINE_RES_MEM(0x99808f000000UL, 0x39 * SZ_16M) },
+#endif }; static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index b048761..d7c10cc 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -367,7 +367,7 @@ static int thunder_pem_init(struct pci_config_window *cfg) return 0; } -static struct pci_ecam_ops pci_thunder_pem_ops = { +struct pci_ecam_ops pci_thunder_pem_ops = { .bus_shift = 24, .init = thunder_pem_init, .pci_ops = { diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 7adad20..65505ea 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -58,6 +58,10 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn, int where); /* default ECAM ops */ extern struct pci_ecam_ops pci_generic_ecam_ops; +/* ECAM ops for known quirks */ +#ifdef CONFIG_PCI_HOST_THUNDER_PEM +extern struct pci_ecam_ops pci_thunder_pem_ops; +#endif #ifdef CONFIG_PCI_HOST_GENERIC /* for DT-based PCI controllers that support ECAM */ -- 1.9.1
On 19.09.2016 17:45, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:06PM +0200, Tomasz Nowicki wrote:
ThunderX PCIe controller to off-chip devices (so-called PEM) is not fully compliant with ECAM standard. It uses non-standard configuration space accessors (see pci_thunder_pem_ops) and custom configuration space granulation (see bus_shift = 24). In order to access configuration space and probe PEM as ACPI based PCI host controller we need to add MCFG quirk infrastructure. This involves:
- Export PEM pci_thunder_pem_ops structure so it is visible to MCFG quirk code.
- New quirk entries for each PEM segment. Each contains platform IDs, mentioned pci_thunder_pem_ops and CFG resources.
Quirk is considered for ThunderX silicon pass2.x only which is identified via MCFG revision 1.
Is it really the case that silicon pass2.x has MCFG revision 1, and silicon pass1.x has MCFG revision 2? That just seems backwards.
It is weird but silicon pass2.x is more common and it had MCFG revision 1 from the beginning. Unless it is allowed to use MCFG revision 0 ? Then we could use MCFG revision 0 for pass1.x
Tomasz
On Tue, Sep 20, 2016 at 09:06:23AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 17:45, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:06PM +0200, Tomasz Nowicki wrote:
ThunderX PCIe controller to off-chip devices (so-called PEM) is not fully compliant with ECAM standard. It uses non-standard configuration space accessors (see pci_thunder_pem_ops) and custom configuration space granulation (see bus_shift = 24). In order to access configuration space and probe PEM as ACPI based PCI host controller we need to add MCFG quirk infrastructure. This involves:
- Export PEM pci_thunder_pem_ops structure so it is visible to MCFG quirk
code. 2. New quirk entries for each PEM segment. Each contains platform IDs, mentioned pci_thunder_pem_ops and CFG resources.
Quirk is considered for ThunderX silicon pass2.x only which is identified via MCFG revision 1.
Is it really the case that silicon pass2.x has MCFG revision 1, and silicon pass1.x has MCFG revision 2? That just seems backwards.
It is weird but silicon pass2.x is more common and it had MCFG revision 1 from the beginning. Unless it is allowed to use MCFG revision 0 ? Then we could use MCFG revision 0 for pass1.x
There's no reason to avoid revision 0. The question is really what firmware is already in the field. We need to accommodate that. We don't want a situation where kernel version X only works with firmware version Y, but kernel version X+1 only works with firmware version Y+1.
Bjorn
On 20.09.2016 15:08, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:06:23AM +0200, Tomasz Nowicki wrote:
On 19.09.2016 17:45, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:06PM +0200, Tomasz Nowicki wrote:
ThunderX PCIe controller to off-chip devices (so-called PEM) is not fully compliant with ECAM standard. It uses non-standard configuration space accessors (see pci_thunder_pem_ops) and custom configuration space granulation (see bus_shift = 24). In order to access configuration space and probe PEM as ACPI based PCI host controller we need to add MCFG quirk infrastructure. This involves:
- Export PEM pci_thunder_pem_ops structure so it is visible to MCFG quirk
code. 2. New quirk entries for each PEM segment. Each contains platform IDs, mentioned pci_thunder_pem_ops and CFG resources.
Quirk is considered for ThunderX silicon pass2.x only which is identified via MCFG revision 1.
Is it really the case that silicon pass2.x has MCFG revision 1, and silicon pass1.x has MCFG revision 2? That just seems backwards.
It is weird but silicon pass2.x is more common and it had MCFG revision 1 from the beginning. Unless it is allowed to use MCFG revision 0 ? Then we could use MCFG revision 0 for pass1.x
There's no reason to avoid revision 0. The question is really what firmware is already in the field. We need to accommodate that. We don't want a situation where kernel version X only works with firmware version Y, but kernel version X+1 only works with firmware version Y+1.
Yes I agree. We have already deployed the firmware where: pass2.x has MCFG revision 1 pass1.x has MCFG revision 2 so we need to stick to this.
Thanks, Tomasz
ThunderX pass1.x requires to emulate the EA headers for on-chip devices hence it has to use custom pci_thunder_ecam_ops for accessing PCI config space (pci-thuner-ecam.c). Add new entries to MCFG quirk array where they can be applied while probing ACPI based PCI host controller.
ThunderX pass1.x is using the same way for accessing off-chip devices (so-called PEM) as silicon pass-2.x so we need to add PEM quirk entries too.
Quirk is considered for ThunderX silicon pass1.x only which is identified via MCFG revision 2.
Signed-off-by: Tomasz Nowicki tn@semihalf.com --- drivers/acpi/pci_mcfg.c | 45 +++++++++++++++++++++++++++++++++++++ drivers/pci/host/pci-thunder-ecam.c | 2 +- include/linux/pci-ecam.h | 3 +++ 3 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 1f73d7b..eb14f74 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -77,6 +77,51 @@ static struct mcfg_fixup mcfg_quirks[] = { DEFINE_RES_MEM(0x994057000000UL, 0x39 * SZ_16M) }, { "CAVIUM", "THUNDERX", 1, 19, MCFG_BUS_ANY, &pci_thunder_pem_ops, DEFINE_RES_MEM(0x99808f000000UL, 0x39 * SZ_16M) }, + + /* SoC pass1.x */ + { "CAVIUM", "THUNDERX", 2, 4, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x88001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 5, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x884057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 6, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x88808f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 7, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x89001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 8, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x894057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 9, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x89808f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 14, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x98001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 15, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x984057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 16, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x98808f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 17, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x99001f000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 18, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x994057000000UL, 0x39 * SZ_16M) }, + { "CAVIUM", "THUNDERX", 2, 19, MCFG_BUS_ANY, &pci_thunder_pem_ops, + DEFINE_RES_MEM(0x99808f000000UL, 0x39 * SZ_16M) }, +#endif +#ifdef CONFIG_PCI_HOST_THUNDER_ECAM + /* SoC pass1.x */ + { "CAVIUM", "THUNDERX", 2, 0, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 1, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 2, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 3, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 10, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 11, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 12, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, + { "CAVIUM", "THUNDERX", 2, 13, MCFG_BUS_ANY, &pci_thunder_ecam_ops, + MCFG_RES_EMPTY}, #endif };
diff --git a/drivers/pci/host/pci-thunder-ecam.c b/drivers/pci/host/pci-thunder-ecam.c index d50a3dc..b6c17e2 100644 --- a/drivers/pci/host/pci-thunder-ecam.c +++ b/drivers/pci/host/pci-thunder-ecam.c @@ -346,7 +346,7 @@ static int thunder_ecam_config_write(struct pci_bus *bus, unsigned int devfn, return pci_generic_config_write(bus, devfn, where, size, val); }
-static struct pci_ecam_ops pci_thunder_ecam_ops = { +struct pci_ecam_ops pci_thunder_ecam_ops = { .bus_shift = 20, .pci_ops = { .map_bus = pci_ecam_map_bus, diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 65505ea..35f0e81 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -62,6 +62,9 @@ extern struct pci_ecam_ops pci_generic_ecam_ops; #ifdef CONFIG_PCI_HOST_THUNDER_PEM extern struct pci_ecam_ops pci_thunder_pem_ops; #endif +#ifdef CONFIG_PCI_HOST_THUNDER_ECAM +extern struct pci_ecam_ops pci_thunder_ecam_ops; +#endif
#ifdef CONFIG_PCI_HOST_GENERIC /* for DT-based PCI controllers that support ECAM */
On 09.09.2016 21:24, Tomasz Nowicki wrote:
Quirk handling relies on an idea of simple static array which contains quirk enties. Each entry consists of identification information (IDs from standard header of MCFG table) along with custom pci_ecam_ops structure and configuration space resource structure. This way it is possible find corresponding quirk entries and override pci_ecam_ops and PCI configuration space regions.
As an example, the last 3 patches present quirk handling mechanism usage for ThunderX.
This series can be found here: git@github.com:semihalf-nowicki-tomasz/linux.git (branch: pci-acpi-quirk-v6)
Thanks, Tomasz
On Fri, Sep 09, 2016 at 09:24:02PM +0200, Tomasz Nowicki wrote:
Quirk handling relies on an idea of simple static array which contains quirk enties. Each entry consists of identification information (IDs from standard header of MCFG table) along with custom pci_ecam_ops structure and configuration space resource structure. This way it is possible find corresponding quirk entries and override pci_ecam_ops and PCI configuration space regions.
As an example, the last 3 patches present quirk handling mechanism usage for ThunderX.
v5 -> v6
- rebase against v4.8-rc5
- drop patch 1 form previous series
- keep pci_acpi_setup_ecam_mapping() in ARM64 arch directory
- move quirk code to pci_mcfg.c
- restrict quirk to override pci_ecam_ops and CFG resource structure only, no init call any more
- split ThunderX quirks into the smaller chunks
- add ThunderX pass1.x silicon revision support
v4 -> v5
- rebase against v4.8-rc1
- rework to exact MCFG OEM ID, TABLE ID, rev match
- use memcmp instead of strncmp
- no substring match
- fix typos and dmesg message
Tomasz Nowicki (5): PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform specific ECAM quirks PCI: thunder-pem: Allow to probe PEM-specific register range for ACPI case PCI: thunder: Enable ACPI PCI controller for ThunderX pass2.x silicon version PCI: thunder: Enable ACPI PCI controller for ThunderX pass1.x silicon version
arch/arm64/kernel/pci.c | 17 ++-- drivers/acpi/pci_mcfg.c | 168 +++++++++++++++++++++++++++++++++++- drivers/pci/host/pci-thunder-ecam.c | 2 +- drivers/pci/host/pci-thunder-pem.c | 63 +++++++++++--- include/linux/pci-acpi.h | 4 +- include/linux/pci-ecam.h | 7 ++ 6 files changed, 230 insertions(+), 31 deletions(-)
I'm not quite ready to merge these because we haven't resolved the question of how to expose the resources used by the memory-mapped config space. I'm fine with the first two patches (I did make a couple trivial changes, see below), but there's no point in merging them until we merge a user for them.
I pushed the series to pci/ecam-v6 for build testing and discussion. The diff (the changes I made locally) from v6 as posted by Tomasz is below.
Bjorn
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1) #define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \ ((end) - (start) + 1), \ NULL, IORESOURCE_BUS) -#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) -#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0) +#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff)
static struct mcfg_fixup mcfg_quirks[] = { -/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +/* { OEM_ID, OEM_TABLE_ID, REV, SEGMENT, BUS_RANGE, cfgres, ops }, */ #ifdef CONFIG_PCI_HOST_THUNDER_PEM +#define THUNDER_PEM_MCFG(rev, seg, addr) \ + { "CAVIUM", "THUNDERX", rev, seg, MCFG_BUS_ANY, \ + &pci_thunder_pem_ops, DEFINE_RES_MEM(addr, 0x39 * SZ_16M) } + /* SoC pass2.x */ - { "CAVIUM", "THUNDERX", 1, 4, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x88001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 5, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x884057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 6, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x88808f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 7, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x89001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 8, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x894057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 9, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x89808f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 14, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x98001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 15, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x984057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 16, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x98808f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 17, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x99001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 18, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x994057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 1, 19, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x99808f000000UL, 0x39 * SZ_16M) }, + THUNDER_PEM_MCFG(1, 4, 0x88001f000000UL), + THUNDER_PEM_MCFG(1, 5, 0x884057000000UL), + THUNDER_PEM_MCFG(1, 6, 0x88808f000000UL), + THUNDER_PEM_MCFG(1, 7, 0x89001f000000UL), + THUNDER_PEM_MCFG(1, 8, 0x894057000000UL), + THUNDER_PEM_MCFG(1, 9, 0x89808f000000UL), + THUNDER_PEM_MCFG(1, 14, 0x98001f000000UL), + THUNDER_PEM_MCFG(1, 15, 0x984057000000UL), + THUNDER_PEM_MCFG(1, 16, 0x98808f000000UL), + THUNDER_PEM_MCFG(1, 17, 0x99001f000000UL), + THUNDER_PEM_MCFG(1, 18, 0x994057000000UL), + THUNDER_PEM_MCFG(1, 19, 0x99808f000000UL),
/* SoC pass1.x */ - { "CAVIUM", "THUNDERX", 2, 4, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x88001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 5, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x884057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 6, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x88808f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 7, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x89001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 8, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x894057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 9, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x89808f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 14, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x98001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 15, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x984057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 16, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x98808f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 17, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x99001f000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 18, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x994057000000UL, 0x39 * SZ_16M) }, - { "CAVIUM", "THUNDERX", 2, 19, MCFG_BUS_ANY, &pci_thunder_pem_ops, - DEFINE_RES_MEM(0x99808f000000UL, 0x39 * SZ_16M) }, + THUNDER_PEM_MCFG(2, 4, 0x88001f000000UL), + THUNDER_PEM_MCFG(2, 5, 0x884057000000UL), + THUNDER_PEM_MCFG(2, 6, 0x88808f000000UL), + THUNDER_PEM_MCFG(2, 7, 0x89001f000000UL), + THUNDER_PEM_MCFG(2, 8, 0x894057000000UL), + THUNDER_PEM_MCFG(2, 9, 0x89808f000000UL), + THUNDER_PEM_MCFG(2, 14, 0x98001f000000UL), + THUNDER_PEM_MCFG(2, 15, 0x984057000000UL), + THUNDER_PEM_MCFG(2, 16, 0x98808f000000UL), + THUNDER_PEM_MCFG(2, 17, 0x99001f000000UL), + THUNDER_PEM_MCFG(2, 18, 0x994057000000UL), + THUNDER_PEM_MCFG(2, 19, 0x99808f000000UL), #endif #ifdef CONFIG_PCI_HOST_THUNDER_ECAM +#define THUNDER_ECAM_MCFG(rev, seg) \ + { "CAVIUM", "THUNDERX", rev, seg, MCFG_BUS_ANY, &pci_thunder_ecam_ops } + /* SoC pass1.x */ - { "CAVIUM", "THUNDERX", 2, 0, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 1, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 2, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 3, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 10, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 11, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 12, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, - { "CAVIUM", "THUNDERX", 2, 13, MCFG_BUS_ANY, &pci_thunder_ecam_ops, - MCFG_RES_EMPTY}, + THUNDER_ECAM_MCFG(2, 0), + THUNDER_ECAM_MCFG(2, 1), + THUNDER_ECAM_MCFG(2, 2), + THUNDER_ECAM_MCFG(2, 3), + THUNDER_ECAM_MCFG(2, 10), + THUNDER_ECAM_MCFG(2, 11), + THUNDER_ECAM_MCFG(2, 12), + THUNDER_ECAM_MCFG(2, 13), #endif };
@@ -141,12 +114,12 @@ static void pci_mcfg_match_quirks(struct acpi_pci_root *root, * table ID, and OEM revision from MCFG table standard header. */ for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) { - if (f->seg == root->segment && - resource_contains(&f->bus_range, &root->secondary) && - !memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) && + if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) && !memcmp(f->oem_table_id, mcfg_oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && - f->oem_revision == mcfg_oem_revision) { + f->oem_revision == mcfg_oem_revision && + f->seg == root->segment && + resource_contains(&f->bus_range, &root->secondary)) { if (f->cfgres.start) *cfgres = f->cfgres; if (f->ops) @@ -195,10 +168,10 @@ skip_lookup: }
/* - * Let to override default ECAM ops and CFG resource range. - * Also, this might even retrieve CFG resource range in case MCFG - * does not have it. Invalid CFG start address means MCFG firmware bug - * or we need another quirk in array. + * Allow quirks to override default ECAM ops and CFG resource + * range. This may even fabricate a CFG resource range in case + * MCFG does not have it. Invalid CFG start address means MCFG + * firmware bug or we need another quirk in array. */ pci_mcfg_match_quirks(root, &res, &ops); if (!res.start) @@ -239,7 +212,7 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header) /* Save MCFG IDs and revision for quirks matching */ memcpy(mcfg_oem_id, header->oem_id, ACPI_OEM_ID_SIZE); memcpy(mcfg_oem_table_id, header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE); - mcfg_oem_revision = header->revision; + mcfg_oem_revision = header->oem_revision;
pr_info("MCFG table detected, %d entries\n", n); return 0;
Hi Bjorn, Thomasz,
On 2016-09-20 15:26, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:02PM +0200, Tomasz Nowicki wrote:
Quirk handling relies on an idea of simple static array which contains quirk enties. Each entry consists of identification information (IDs from standard header of MCFG table) along with custom pci_ecam_ops structure and configuration space resource structure. This way it is possible find corresponding quirk entries and override pci_ecam_ops and PCI configuration space regions.
As an example, the last 3 patches present quirk handling mechanism usage for ThunderX.
v5 -> v6
- rebase against v4.8-rc5
- drop patch 1 form previous series
- keep pci_acpi_setup_ecam_mapping() in ARM64 arch directory
- move quirk code to pci_mcfg.c
- restrict quirk to override pci_ecam_ops and CFG resource structure only, no init call any more
- split ThunderX quirks into the smaller chunks
- add ThunderX pass1.x silicon revision support
v4 -> v5
- rebase against v4.8-rc1
- rework to exact MCFG OEM ID, TABLE ID, rev match
- use memcmp instead of strncmp
- no substring match
- fix typos and dmesg message
Tomasz Nowicki (5): PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform specific ECAM quirks PCI: thunder-pem: Allow to probe PEM-specific register range for ACPI case PCI: thunder: Enable ACPI PCI controller for ThunderX pass2.x silicon version PCI: thunder: Enable ACPI PCI controller for ThunderX pass1.x silicon version
arch/arm64/kernel/pci.c | 17 ++-- drivers/acpi/pci_mcfg.c | 168 +++++++++++++++++++++++++++++++++++- drivers/pci/host/pci-thunder-ecam.c | 2 +- drivers/pci/host/pci-thunder-pem.c | 63 +++++++++++--- include/linux/pci-acpi.h | 4 +- include/linux/pci-ecam.h | 7 ++ 6 files changed, 230 insertions(+), 31 deletions(-)
I'm not quite ready to merge these because we haven't resolved the question of how to expose the resources used by the memory-mapped config space. I'm fine with the first two patches (I did make a couple trivial changes, see below), but there's no point in merging them until we merge a user for them.
I pushed the series to pci/ecam-v6 for build testing and discussion. The diff (the changes I made locally) from v6 as posted by Tomasz is below.
Rebasing the following simple quirks framework user onto this branch, I have some questions.
https://source.codeaurora.org/quic/server/kernel/commit/?h=cov/4.8-rc2-testi...
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \ ((end) - (start) + 1), \ NULL, IORESOURCE_BUS) -#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) -#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0) +#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff)
static struct mcfg_fixup mcfg_quirks[] = { -/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +/* { OEM_ID, OEM_TABLE_ID, REV, SEGMENT, BUS_RANGE, cfgres, ops }, */
This comment appears to have the order of cfgres and ops reversed.
Am I correct in reading that if a user of the framework does not wish to override cfgres they must place a struct resource with .start = 0 at the end of their mcfg_quirks entry? If so, I guess I have the same questions about removing MCFG_RES_EMPTY as I do about removing MCFG_DOM_ANY.
Thanks, Cov
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
Hi Bjorn, Thomasz,
On 2016-09-20 15:26, Bjorn Helgaas wrote:
On Fri, Sep 09, 2016 at 09:24:02PM +0200, Tomasz Nowicki wrote:
Quirk handling relies on an idea of simple static array which contains quirk enties. Each entry consists of identification information (IDs from standard header of MCFG table) along with custom pci_ecam_ops structure and configuration space resource structure. This way it is possible find corresponding quirk entries and override pci_ecam_ops and PCI configuration space regions.
As an example, the last 3 patches present quirk handling mechanism usage for ThunderX.
v5 -> v6
- rebase against v4.8-rc5
- drop patch 1 form previous series
- keep pci_acpi_setup_ecam_mapping() in ARM64 arch directory
- move quirk code to pci_mcfg.c
- restrict quirk to override pci_ecam_ops and CFG resource structure
only, no init call any more
- split ThunderX quirks into the smaller chunks
- add ThunderX pass1.x silicon revision support
v4 -> v5
- rebase against v4.8-rc1
- rework to exact MCFG OEM ID, TABLE ID, rev match
- use memcmp instead of strncmp
- no substring match
- fix typos and dmesg message
Tomasz Nowicki (5): PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform specific ECAM quirks PCI: thunder-pem: Allow to probe PEM-specific register range for ACPI case PCI: thunder: Enable ACPI PCI controller for ThunderX pass2.x silicon version PCI: thunder: Enable ACPI PCI controller for ThunderX pass1.x silicon version
arch/arm64/kernel/pci.c | 17 ++-- drivers/acpi/pci_mcfg.c | 168 +++++++++++++++++++++++++++++++++++- drivers/pci/host/pci-thunder-ecam.c | 2 +- drivers/pci/host/pci-thunder-pem.c | 63 +++++++++++--- include/linux/pci-acpi.h | 4 +- include/linux/pci-ecam.h | 7 ++ 6 files changed, 230 insertions(+), 31 deletions(-)
I'm not quite ready to merge these because we haven't resolved the question of how to expose the resources used by the memory-mapped config space. I'm fine with the first two patches (I did make a couple trivial changes, see below), but there's no point in merging them until we merge a user for them.
I pushed the series to pci/ecam-v6 for build testing and discussion. The diff (the changes I made locally) from v6 as posted by Tomasz is below.
Rebasing the following simple quirks framework user onto this branch, I have some questions.
https://source.codeaurora.org/quic/server/kernel/commit/?h=cov/4.8-rc2-testi...
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \ ((end) - (start) + 1), \ NULL, IORESOURCE_BUS) -#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff) -#define MCFG_RES_EMPTY DEFINE_RES_NAMED(0, 0, NULL, 0) +#define MCFG_BUS_ANY MCFG_BUS_RANGE(0x0, 0xff)
static struct mcfg_fixup mcfg_quirks[] = { -/* { OEM_ID, OEM_TABLE_ID, REV, DOMAIN, BUS_RANGE, cfgres, ops }, */ +/* { OEM_ID, OEM_TABLE_ID, REV, SEGMENT, BUS_RANGE, cfgres, ops }, */
This comment appears to have the order of cfgres and ops reversed.
Fixed, thanks!
Am I correct in reading that if a user of the framework does not wish to override cfgres they must place a struct resource with .start = 0 at the end of their mcfg_quirks entry? If so, I guess I have the same questions about removing MCFG_RES_EMPTY as I do about removing MCFG_DOM_ANY.
You're right that we only override cfgres if the quirk supplies a struct resource with non-zero .start. I removed MCFG_RES_EMPTY because mcfg_quirks[] is a static array and is initialized with all members being zero anyway. If a quirk doesn't need to override cfgres, I think it's more readable if the quirk just doesn't mention the resource at all.
Bjorn
On 9/21/2016 9:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
Hi Bjorn, Thomasz,
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
Is it possible to queue up Cov's patch as part of this effort once he rebases and sends an updated version? Cov will have to implement something else now.
On Wed, Sep 21, 2016 at 10:07:36AM -0400, Sinan Kaya wrote:
On 9/21/2016 9:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
Hi Bjorn, Thomasz,
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
Is it possible to queue up Cov's patch as part of this effort once he rebases and sends an updated version? Cov will have to implement something else now.
I haven't see Cov's patch (patchwork doesn't follow URLs to git trees, and I normally don't either). If they show up on the mailing list, I'll take a look, of course.
Bjorn
On 9/21/2016 1:31 PM, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 10:07:36AM -0400, Sinan Kaya wrote:
On 9/21/2016 9:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
Hi Bjorn, Thomasz,
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
Is it possible to queue up Cov's patch as part of this effort once he rebases and sends an updated version? Cov will have to implement something else now.
I haven't see Cov's patch (patchwork doesn't follow URLs to git trees, and I normally don't either). If they show up on the mailing list, I'll take a look, of course.
Bjorn
Thanks, I talked to Cov today. He's getting ready to post the rebased patch once he completes testing.
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org --- drivers/acpi/pci_mcfg.c | 8 ++++++++ drivers/pci/ecam.c | 10 ++++++++++ include/linux/pci-ecam.h | 1 + 3 files changed, 19 insertions(+)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 245b79f..212334f 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -96,6 +96,14 @@ static struct mcfg_fixup mcfg_quirks[] = { THUNDER_ECAM_MCFG(2, 12), THUNDER_ECAM_MCFG(2, 13), #endif + { "QCOM ", "QDF2432 ", 1, 0, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 1, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 2, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 3, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 4, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops }, + { "QCOM ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops }, };
static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 43ed08d..c3b3063 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -162,3 +162,13 @@ struct pci_ecam_ops pci_generic_ecam_ops = { .write = pci_generic_config_write, } }; + +/* ops for 32 bit config space access quirk */ +struct pci_ecam_ops pci_32b_ops = { + .bus_shift = 20, + .pci_ops = { + .map_bus = pci_ecam_map_bus, + .read = pci_generic_config_read32, + .write = pci_generic_config_write32, + } +}; diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 35f0e81..a6cffb8 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -65,6 +65,7 @@ extern struct pci_ecam_ops pci_thunder_pem_ops; #ifdef CONFIG_PCI_HOST_THUNDER_ECAM extern struct pci_ecam_ops pci_thunder_ecam_ops; #endif +extern struct pci_ecam_ops pci_32b_ops;
#ifdef CONFIG_PCI_HOST_GENERIC /* for DT-based PCI controllers that support ECAM */
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
Note that this hardware is not spec-compliant since it doesn't support sub-32 bit config writes. I just proposed a patch to warn about that [1], so if/when we merge that patch and this one, you'll start seeing those warnings.
[1] http://lkml.kernel.org/r/20161031213902.6340.96123.stgit@bhelgaas-glaptop.ro...
drivers/acpi/pci_mcfg.c | 8 ++++++++ drivers/pci/ecam.c | 10 ++++++++++ include/linux/pci-ecam.h | 1 + 3 files changed, 19 insertions(+)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 245b79f..212334f 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -96,6 +96,14 @@ static struct mcfg_fixup mcfg_quirks[] = { THUNDER_ECAM_MCFG(2, 12), THUNDER_ECAM_MCFG(2, 13), #endif
- { "QCOM ", "QDF2432 ", 1, 0, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 1, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 2, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 3, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 4, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
- { "QCOM ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
}; static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 43ed08d..c3b3063 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -162,3 +162,13 @@ struct pci_ecam_ops pci_generic_ecam_ops = { .write = pci_generic_config_write, } };
+/* ops for 32 bit config space access quirk */ +struct pci_ecam_ops pci_32b_ops = {
- .bus_shift = 20,
- .pci_ops = {
.map_bus = pci_ecam_map_bus,
.read = pci_generic_config_read32,
.write = pci_generic_config_write32,
- }
+}; diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 35f0e81..a6cffb8 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -65,6 +65,7 @@ extern struct pci_ecam_ops pci_thunder_pem_ops; #ifdef CONFIG_PCI_HOST_THUNDER_ECAM extern struct pci_ecam_ops pci_thunder_ecam_ops; #endif +extern struct pci_ecam_ops pci_32b_ops; #ifdef CONFIG_PCI_HOST_GENERIC /* for DT-based PCI controllers that support ECAM */ -- Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Note that this hardware is not spec-compliant since it doesn't support sub-32 bit config writes. I just proposed a patch to warn about that [1], so if/when we merge that patch and this one, you'll start seeing those warnings.
[1] http://lkml.kernel.org/r/20161031213902.6340.96123.stgit@bhelgaas-glaptop.ro...
That looks great, thank you. The earlier PCI HDL and SoC vendors can be made aware of such problems, the better.
Thanks, Cov
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
- I'm OK with the first two patches that add the quirk infrastructure.
- My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Is there also an ACPI device that contains that space in _CRS? I think we concluded that the standard solution is to describe this with a PNP0C02 device.
Would you mind opening a bugzilla at bugzilla.kernel.org and attaching the dmesg log, /proc/iomem, and maybe a DSDT dump? I'd like to have something to point at to say "if you need an MCFG quirk, you need the MCFG bit and *also* these other related ACPI device bits, and here's how it should be done."
Bjorn
Hi Bjorn,
On 11/2/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Qualcomm ECAM space includes both the root port and the endpoint address space with a single contiguous 256 MB address space described in MCFG table. There is no need to describe additional resources like PNP0C02.
The only thing we missed was 8/16 bits access support on the root port. That's why, we need Cov's patch.
Is there also an ACPI device that contains that space in _CRS? I think we concluded that the standard solution is to describe this with a PNP0C02 device.
Would you mind opening a bugzilla at bugzilla.kernel.org and attaching the dmesg log, /proc/iomem, and maybe a DSDT dump? I'd like to have something to point at to say "if you need an MCFG quirk, you need the MCFG bit and *also* these other related ACPI device bits, and here's how it should be done."
Bjorn
On Wed, Nov 02, 2016 at 12:36:16PM -0400, Sinan Kaya wrote:
Hi Bjorn,
On 11/2/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Qualcomm ECAM space includes both the root port and the endpoint address space with a single contiguous 256 MB address space described in MCFG table. There is no need to describe additional resources like PNP0C02.
This is the crucial point I have failed to communicate clearly: the PNP0C02 resource is *always* required, even if the MCFG is correct.
The reason is that MCFG is a PCI-specific table, and it should be possible to boot a kernel with no PCI support. That kernel will not look at the MCFG. The PCI hardware will still be present and will still consume the ECAM space, so the OS must be able to discover that the ECAM space is not available for other devices.
The usual way to for the OS to discover that would be via the _CRS of a PNP0A03 or PNP0A08 host bridge device. _CRS is what I mean by a "generic" way to describe this address space, because the ACPI core can interpret _CRS for all ACPI devices, even if the kernel doesn't contain drivers for all of those devices.
It turns out that we can't use the _CRS of host bridges because of the Producer/Consumer bit screwup [1]. So the fallback is to include the ECAM space in the _CRS of a PNP0C02 device. This is what the PCI Firmware spec r3.0, Table 4-2, footnote 2 is talking about.
Bjorn
[1] The original ACPI spec intent was that Consumer resources would be space like ECAM that is consumed directly by the bridge, and Producer resources would be the windows forwarded down to PCI. But BIOSes didn't use the Producer/Consumer bit consistently, so we have to assume that all resources in host bridge _CRS are windows, which leaves us no way to describe the Consumer resources.
On 11/3/2016 10:00 AM, Bjorn Helgaas wrote:
On Wed, Nov 02, 2016 at 12:36:16PM -0400, Sinan Kaya wrote:
Hi Bjorn,
On 11/2/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Qualcomm ECAM space includes both the root port and the endpoint address space with a single contiguous 256 MB address space described in MCFG table. There is no need to describe additional resources like PNP0C02.
This is the crucial point I have failed to communicate clearly: the PNP0C02 resource is *always* required, even if the MCFG is correct.
Interesting...
It looks like there is a lot of lessons learnt here from history.
I think this requirement is only true if your system DDR space and PCIe space overlaps in the memory map. I understand that Intel systems allow sharing of these two memory ranges. An OS could potentially reclaim this address range.
If there is no overlap and PCI is not enabled, there can't be any SW entity to reclaim this space.
Did I miss something?
The reason is that MCFG is a PCI-specific table, and it should be possible to boot a kernel with no PCI support. That kernel will not look at the MCFG. The PCI hardware will still be present and will still consume the ECAM space, so the OS must be able to discover that the ECAM space is not available for other devices.
The usual way to for the OS to discover that would be via the _CRS of a PNP0A03 or PNP0A08 host bridge device. _CRS is what I mean by a "generic" way to describe this address space, because the ACPI core can interpret _CRS for all ACPI devices, even if the kernel doesn't contain drivers for all of those devices.
It turns out that we can't use the _CRS of host bridges because of the Producer/Consumer bit screwup [1]. So the fallback is to include the ECAM space in the _CRS of a PNP0C02 device. This is what the PCI Firmware spec r3.0, Table 4-2, footnote 2 is talking about.
Bjorn
[1] The original ACPI spec intent was that Consumer resources would be space like ECAM that is consumed directly by the bridge, and Producer resources would be the windows forwarded down to PCI. But BIOSes didn't use the Producer/Consumer bit consistently, so we have to assume that all resources in host bridge _CRS are windows, which leaves us no way to describe the Consumer resources. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11/3/2016 12:58 PM, Sinan Kaya wrote:
This is the crucial point I have failed to communicate clearly: the PNP0C02 resource is *always* required, even if the MCFG is correct.
Interesting...
It looks like there is a lot of lessons learnt here from history.
I think this requirement is only true if your system DDR space and PCIe space overlaps in the memory map. I understand that Intel systems allow sharing of these two memory ranges. An OS could potentially reclaim this address range.
If there is no overlap and PCI is not enabled, there can't be any SW entity to reclaim this space.
Did I miss something?
For protection, it makes sense to reserve this range. I'm trying to understand who would claim this range.
On Thu, Nov 03, 2016 at 12:58:10PM -0400, Sinan Kaya wrote:
On 11/3/2016 10:00 AM, Bjorn Helgaas wrote:
On Wed, Nov 02, 2016 at 12:36:16PM -0400, Sinan Kaya wrote:
Hi Bjorn,
On 11/2/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote: > The Qualcomm Technologies QDF2432 SoC does not support accesses > smaller > than 32 bits to the PCI configuration space. Register the appropriate > quirk. > > Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Qualcomm ECAM space includes both the root port and the endpoint address space with a single contiguous 256 MB address space described in MCFG table. There is no need to describe additional resources like PNP0C02.
This is the crucial point I have failed to communicate clearly: the PNP0C02 resource is *always* required, even if the MCFG is correct.
Interesting...
It looks like there is a lot of lessons learnt here from history.
I think this requirement is only true if your system DDR space and PCIe space overlaps in the memory map. I understand that Intel systems allow sharing of these two memory ranges. An OS could potentially reclaim this address range.
If there is no overlap and PCI is not enabled, there can't be any SW entity to reclaim this space.
No, this isn't really anything to do with DDR/PCIe overlaps. This is just a fundamental part of the ACPI model: the firmware should communicate all address space usage to the OS either via ACPI or via standard self-describing mechanisms like PCI BARs.
You can argue that this isn't "necessary", but that's an assumption based on your knowledge of this particular system, and we don't want the OS to have to make that assumption. For example, ACPI allows the hot-addition of new ACPI devices, and we may have to assign address space for them, and we don't want to collide with existing devices.
Bjorn
On 11/3/2016 4:43 PM, Bjorn Helgaas wrote:
On Thu, Nov 03, 2016 at 12:58:10PM -0400, Sinan Kaya wrote:
On 11/3/2016 10:00 AM, Bjorn Helgaas wrote:
On Wed, Nov 02, 2016 at 12:36:16PM -0400, Sinan Kaya wrote:
Hi Bjorn,
On 11/2/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote: > On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote: >> The Qualcomm Technologies QDF2432 SoC does not support accesses >> smaller >> than 32 bits to the PCI configuration space. Register the appropriate >> quirk. >> >> Signed-off-by: Christopher Covington cov@codeaurora.org > > Hi Christopher, > > Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Qualcomm ECAM space includes both the root port and the endpoint address space with a single contiguous 256 MB address space described in MCFG table. There is no need to describe additional resources like PNP0C02.
This is the crucial point I have failed to communicate clearly: the PNP0C02 resource is *always* required, even if the MCFG is correct.
Interesting...
It looks like there is a lot of lessons learnt here from history.
I think this requirement is only true if your system DDR space and PCIe space overlaps in the memory map. I understand that Intel systems allow sharing of these two memory ranges. An OS could potentially reclaim this address range.
If there is no overlap and PCI is not enabled, there can't be any SW entity to reclaim this space.
No, this isn't really anything to do with DDR/PCIe overlaps. This is just a fundamental part of the ACPI model: the firmware should communicate all address space usage to the OS either via ACPI or via standard self-describing mechanisms like PCI BARs.
You can argue that this isn't "necessary", but that's an assumption based on your knowledge of this particular system, and we don't want the OS to have to make that assumption. For example, ACPI allows the hot-addition of new ACPI devices, and we may have to assign address space for them, and we don't want to collide with existing devices.
Thanks for the description.
Bjorn
To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11/03/2016 10:00 AM, Bjorn Helgaas wrote:
It turns out that we can't use the _CRS of host bridges because of the Producer/Consumer bit screwup [1]. So the fallback is to include the ECAM space in the _CRS of a PNP0C02 device. This is what the PCI Firmware spec r3.0, Table 4-2, footnote 2 is talking about.
Bjorn
[1] The original ACPI spec intent was that Consumer resources would be space like ECAM that is consumed directly by the bridge, and Producer resources would be the windows forwarded down to PCI. But BIOSes didn't use the Producer/Consumer bit consistently, so we have to assume that all resources in host bridge _CRS are windows, which leaves us no way to describe the Consumer resources.
Aside - and now I realize you'd called this out as recently as last month. Alas the HPE m400 I reference on the other thread about the APM quirks doesn't have the motherboard resource entry so we're stuck with exactly the situation you describe above there.
Jon.
On Wed, Nov 02, 2016 at 11:08:20AM -0500, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
I put those first two patches and yours on pci/ecam-v6 and pushed it again, so you can check it out.
Bjorn
Hi Bjorn,
On 11/02/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Is there also an ACPI device that contains that space in _CRS? I think we concluded that the standard solution is to describe this with a PNP0C02 device.
Would you mind opening a bugzilla at bugzilla.kernel.org and attaching the dmesg log, /proc/iomem, and maybe a DSDT dump? I'd like to have something to point at to say "if you need an MCFG quirk, you need the MCFG bit and *also* these other related ACPI device bits, and here's how it should be done."
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Thanks, Cov
On Wed, Nov 09, 2016 at 02:25:56PM -0500, Christopher Covington wrote:
Hi Bjorn,
On 11/02/2016 12:08 PM, Bjorn Helgaas wrote:
On Tue, Nov 01, 2016 at 07:06:31AM -0600, cov@codeaurora.org wrote:
Hi Bjorn,
On 2016-10-31 15:48, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:38:05PM -0400, Christopher Covington wrote:
The Qualcomm Technologies QDF2432 SoC does not support accesses smaller than 32 bits to the PCI configuration space. Register the appropriate quirk.
Signed-off-by: Christopher Covington cov@codeaurora.org
Hi Christopher,
Can you rebase this against v4.9-rc1? It no longer applies to my tree.
I apologize for not being clearer. This patch depends on:
PCI/ACPI: Extend pci_mcfg_lookup() responsibilities PCI/ACPI: Check platform-specific ECAM quirks
These patches from Tomasz Nowicki were previously in your pci/ecam-v6 branch, but that seems to have come and gone. How would you like to proceed?
Oh yes, that's right, I forgot that connection. I'm afraid I kind of dropped the ball on that thread, so I went back and read through it again.
I *think* the current state is:
I'm OK with the first two patches that add the quirk infrastructure.
My issue with the last three patches that add ThunderX quirks is that there's no generic description of the ECAM address space.
So if I understand correctly, your Qualcomm patch depends only on the first two patches.
Then the question is how the Qualcomm ECAM address space is described. Your quirk overrides the default pci_generic_ecam_ops with the &pci_32b_ops, but it doesn't touch the address space part, so I assume the bus ranges and corresponding address space in your MCFG is correct. So far, so good.
Is there also an ACPI device that contains that space in _CRS? I think we concluded that the standard solution is to describe this with a PNP0C02 device.
Would you mind opening a bugzilla at bugzilla.kernel.org and attaching the dmesg log, /proc/iomem, and maybe a DSDT dump? I'd like to have something to point at to say "if you need an MCFG quirk, you need the MCFG bit and *also* these other related ACPI device bits, and here's how it should be done."
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Note that the ECAM space is not the only thing that should be described via these PNP0C02 devices. *All* non-enumerable resources should be described by the _CRS method of some ACPI device. Here's a sample from my laptop:
PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000) system 00:01: [io 0x1800-0x189f] could not be reserved system 00:01: [io 0x0800-0x087f] has been reserved system 00:01: [io 0x0880-0x08ff] has been reserved system 00:01: [io 0x0900-0x097f] has been reserved system 00:01: [io 0x0980-0x09ff] has been reserved system 00:01: [io 0x0a00-0x0a7f] has been reserved system 00:01: [io 0x0a80-0x0aff] has been reserved system 00:01: [io 0x0b00-0x0b7f] has been reserved system 00:01: [io 0x0b80-0x0bff] has been reserved system 00:01: [io 0x15e0-0x15ef] has been reserved system 00:01: [io 0x1600-0x167f] has been reserved system 00:01: [io 0x1640-0x165f] has been reserved system 00:01: [mem 0xf8000000-0xfbffffff] could not be reserved system 00:01: [mem 0xfed10000-0xfed13fff] has been reserved system 00:01: [mem 0xfed18000-0xfed18fff] has been reserved system 00:01: [mem 0xfed19000-0xfed19fff] has been reserved system 00:01: [mem 0xfeb00000-0xfebfffff] has been reserved system 00:01: [mem 0xfed20000-0xfed3ffff] has been reserved system 00:01: [mem 0xfed90000-0xfed93fff] has been reserved system 00:01: [mem 0xf7fe0000-0xf7ffffff] has been reserved system 00:01: Plug and Play ACPI device, IDs PNP0c02 (active)
Do you have firmware in the field that may not get updated? If so, I'd like to see the whole solution for that firmware, including the MCFG quirk (which tells the PCI core where the ECAM region is) and whatever PNP0C02 quirk you figure out to actually reserve the region.
I proposed a PNP0C02 quirk to Duc along these lines of the below. I don't actually know if it's feasible, but it didn't look as bad as I expected, so I'd kind of like somebody to try it out. I think you would have to call this via a DMI hook (do you have DMI on arm64?), maybe from pnp_init() or similar.
struct pnp_protocol pnpquirk_protocol = { .name = "Plug and Play Quirks", };
void quirk() { struct pnp_dev *dev; struct resource res;
ret = pnp_register_protocol(&pnpquirk_protocol); if (ret) return;
dev = pnp_alloc_dev(&pnpquirk_protocol, 0, "PNP0C02"); if (!dev) return;
res.start = XX; /* ECAM start */ res.end = YY; /* ECAM end */ res.flags = IORESOURCE_MEM; pnp_add_resource(dev, &res);
dev->active = 1; pnp_add_device(dev);
dev_info(&dev->dev, "fabricated device to reserve ECAM space %pR\n", &res); }
Bjorn
Hi Bjorn,
On 9 November 2016 at 20:06, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 02:25:56PM -0500, Christopher Covington wrote:
Hi Bjorn,
[...]
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Note that the ECAM space is not the only thing that should be described via these PNP0C02 devices. *All* non-enumerable resources should be described by the _CRS method of some ACPI device. Here's a sample from my laptop:
PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000) system 00:01: [io 0x1800-0x189f] could not be reserved system 00:01: [io 0x0800-0x087f] has been reserved system 00:01: [io 0x0880-0x08ff] has been reserved system 00:01: [io 0x0900-0x097f] has been reserved system 00:01: [io 0x0980-0x09ff] has been reserved system 00:01: [io 0x0a00-0x0a7f] has been reserved system 00:01: [io 0x0a80-0x0aff] has been reserved system 00:01: [io 0x0b00-0x0b7f] has been reserved system 00:01: [io 0x0b80-0x0bff] has been reserved system 00:01: [io 0x15e0-0x15ef] has been reserved system 00:01: [io 0x1600-0x167f] has been reserved system 00:01: [io 0x1640-0x165f] has been reserved system 00:01: [mem 0xf8000000-0xfbffffff] could not be reserved system 00:01: [mem 0xfed10000-0xfed13fff] has been reserved system 00:01: [mem 0xfed18000-0xfed18fff] has been reserved system 00:01: [mem 0xfed19000-0xfed19fff] has been reserved system 00:01: [mem 0xfeb00000-0xfebfffff] has been reserved system 00:01: [mem 0xfed20000-0xfed3ffff] has been reserved system 00:01: [mem 0xfed90000-0xfed93fff] has been reserved system 00:01: [mem 0xf7fe0000-0xf7ffffff] has been reserved system 00:01: Plug and Play ACPI device, IDs PNP0c02 (active)
Do you have firmware in the field that may not get updated? If so, I'd like to see the whole solution for that firmware, including the MCFG quirk (which tells the PCI core where the ECAM region is) and whatever PNP0C02 quirk you figure out to actually reserve the region.
I proposed a PNP0C02 quirk to Duc along these lines of the below. I don't actually know if it's feasible, but it didn't look as bad as I expected, so I'd kind of like somebody to try it out. I think you would have to call this via a DMI hook (do you have DMI on arm64?), maybe from pnp_init() or similar.
We do have SMBIOS/DMI on arm64, but we have been successful so far not to rely on it for quirks, and we'd very much like to keep it that way.
Since this ACPI _CRS method has nothing to do with SMBIOS/DMI, surely there is a better way to wire up the reservation code to the information exposed by ACPI?
On Wed, Nov 09, 2016 at 08:29:23PM +0000, Ard Biesheuvel wrote:
Hi Bjorn,
On 9 November 2016 at 20:06, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 02:25:56PM -0500, Christopher Covington wrote:
Hi Bjorn,
[...]
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Note that the ECAM space is not the only thing that should be described via these PNP0C02 devices. *All* non-enumerable resources should be described by the _CRS method of some ACPI device. Here's a sample from my laptop:
PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000) system 00:01: [io 0x1800-0x189f] could not be reserved system 00:01: [io 0x0800-0x087f] has been reserved system 00:01: [io 0x0880-0x08ff] has been reserved system 00:01: [io 0x0900-0x097f] has been reserved system 00:01: [io 0x0980-0x09ff] has been reserved system 00:01: [io 0x0a00-0x0a7f] has been reserved system 00:01: [io 0x0a80-0x0aff] has been reserved system 00:01: [io 0x0b00-0x0b7f] has been reserved system 00:01: [io 0x0b80-0x0bff] has been reserved system 00:01: [io 0x15e0-0x15ef] has been reserved system 00:01: [io 0x1600-0x167f] has been reserved system 00:01: [io 0x1640-0x165f] has been reserved system 00:01: [mem 0xf8000000-0xfbffffff] could not be reserved system 00:01: [mem 0xfed10000-0xfed13fff] has been reserved system 00:01: [mem 0xfed18000-0xfed18fff] has been reserved system 00:01: [mem 0xfed19000-0xfed19fff] has been reserved system 00:01: [mem 0xfeb00000-0xfebfffff] has been reserved system 00:01: [mem 0xfed20000-0xfed3ffff] has been reserved system 00:01: [mem 0xfed90000-0xfed93fff] has been reserved system 00:01: [mem 0xf7fe0000-0xf7ffffff] has been reserved system 00:01: Plug and Play ACPI device, IDs PNP0c02 (active)
Do you have firmware in the field that may not get updated? If so, I'd like to see the whole solution for that firmware, including the MCFG quirk (which tells the PCI core where the ECAM region is) and whatever PNP0C02 quirk you figure out to actually reserve the region.
I proposed a PNP0C02 quirk to Duc along these lines of the below. I don't actually know if it's feasible, but it didn't look as bad as I expected, so I'd kind of like somebody to try it out. I think you would have to call this via a DMI hook (do you have DMI on arm64?), maybe from pnp_init() or similar.
We do have SMBIOS/DMI on arm64, but we have been successful so far not to rely on it for quirks, and we'd very much like to keep it that way.
Since this ACPI _CRS method has nothing to do with SMBIOS/DMI, surely there is a better way to wire up the reservation code to the information exposed by ACPI?
I'm open to other ways, feel free to propose one :)
If you do a quirk, you need some way to identify the machine/firmware combination, because you don't want to apply the quirk on every machine. You're trying to work around a firmware issue, so you probably want something tied to the firmware version. On x86, that's typically done with DMI.
Bjorn
On 10 November 2016 at 06:49, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 08:29:23PM +0000, Ard Biesheuvel wrote:
Hi Bjorn,
On 9 November 2016 at 20:06, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 02:25:56PM -0500, Christopher Covington wrote:
Hi Bjorn,
[...]
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Note that the ECAM space is not the only thing that should be described via these PNP0C02 devices. *All* non-enumerable resources should be described by the _CRS method of some ACPI device. Here's a sample from my laptop:
PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000) system 00:01: [io 0x1800-0x189f] could not be reserved system 00:01: [io 0x0800-0x087f] has been reserved system 00:01: [io 0x0880-0x08ff] has been reserved system 00:01: [io 0x0900-0x097f] has been reserved system 00:01: [io 0x0980-0x09ff] has been reserved system 00:01: [io 0x0a00-0x0a7f] has been reserved system 00:01: [io 0x0a80-0x0aff] has been reserved system 00:01: [io 0x0b00-0x0b7f] has been reserved system 00:01: [io 0x0b80-0x0bff] has been reserved system 00:01: [io 0x15e0-0x15ef] has been reserved system 00:01: [io 0x1600-0x167f] has been reserved system 00:01: [io 0x1640-0x165f] has been reserved system 00:01: [mem 0xf8000000-0xfbffffff] could not be reserved system 00:01: [mem 0xfed10000-0xfed13fff] has been reserved system 00:01: [mem 0xfed18000-0xfed18fff] has been reserved system 00:01: [mem 0xfed19000-0xfed19fff] has been reserved system 00:01: [mem 0xfeb00000-0xfebfffff] has been reserved system 00:01: [mem 0xfed20000-0xfed3ffff] has been reserved system 00:01: [mem 0xfed90000-0xfed93fff] has been reserved system 00:01: [mem 0xf7fe0000-0xf7ffffff] has been reserved system 00:01: Plug and Play ACPI device, IDs PNP0c02 (active)
Do you have firmware in the field that may not get updated? If so, I'd like to see the whole solution for that firmware, including the MCFG quirk (which tells the PCI core where the ECAM region is) and whatever PNP0C02 quirk you figure out to actually reserve the region.
I proposed a PNP0C02 quirk to Duc along these lines of the below. I don't actually know if it's feasible, but it didn't look as bad as I expected, so I'd kind of like somebody to try it out. I think you would have to call this via a DMI hook (do you have DMI on arm64?), maybe from pnp_init() or similar.
We do have SMBIOS/DMI on arm64, but we have been successful so far not to rely on it for quirks, and we'd very much like to keep it that way.
Since this ACPI _CRS method has nothing to do with SMBIOS/DMI, surely there is a better way to wire up the reservation code to the information exposed by ACPI?
I'm open to other ways, feel free to propose one :)
If you do a quirk, you need some way to identify the machine/firmware combination, because you don't want to apply the quirk on every machine. You're trying to work around a firmware issue, so you probably want something tied to the firmware version. On x86, that's typically done with DMI.
I think I misunderstood the purpose of the example: that should only be necessary if the _CRS methods are missing from the firmware, right? If we update the firmware to cover all non-enumerable resources by such a method, we shouldn't need any such quirks at all IIUC
On Thu, Nov 10, 2016 at 06:25:16PM +0800, Ard Biesheuvel wrote:
On 10 November 2016 at 06:49, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 08:29:23PM +0000, Ard Biesheuvel wrote:
Hi Bjorn,
On 9 November 2016 at 20:06, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 02:25:56PM -0500, Christopher Covington wrote:
Hi Bjorn,
[...]
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Note that the ECAM space is not the only thing that should be described via these PNP0C02 devices. *All* non-enumerable resources should be described by the _CRS method of some ACPI device. Here's a sample from my laptop:
PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000) system 00:01: [io 0x1800-0x189f] could not be reserved system 00:01: [io 0x0800-0x087f] has been reserved system 00:01: [io 0x0880-0x08ff] has been reserved system 00:01: [io 0x0900-0x097f] has been reserved system 00:01: [io 0x0980-0x09ff] has been reserved system 00:01: [io 0x0a00-0x0a7f] has been reserved system 00:01: [io 0x0a80-0x0aff] has been reserved system 00:01: [io 0x0b00-0x0b7f] has been reserved system 00:01: [io 0x0b80-0x0bff] has been reserved system 00:01: [io 0x15e0-0x15ef] has been reserved system 00:01: [io 0x1600-0x167f] has been reserved system 00:01: [io 0x1640-0x165f] has been reserved system 00:01: [mem 0xf8000000-0xfbffffff] could not be reserved system 00:01: [mem 0xfed10000-0xfed13fff] has been reserved system 00:01: [mem 0xfed18000-0xfed18fff] has been reserved system 00:01: [mem 0xfed19000-0xfed19fff] has been reserved system 00:01: [mem 0xfeb00000-0xfebfffff] has been reserved system 00:01: [mem 0xfed20000-0xfed3ffff] has been reserved system 00:01: [mem 0xfed90000-0xfed93fff] has been reserved system 00:01: [mem 0xf7fe0000-0xf7ffffff] has been reserved system 00:01: Plug and Play ACPI device, IDs PNP0c02 (active)
Do you have firmware in the field that may not get updated? If so, I'd like to see the whole solution for that firmware, including the MCFG quirk (which tells the PCI core where the ECAM region is) and whatever PNP0C02 quirk you figure out to actually reserve the region.
I proposed a PNP0C02 quirk to Duc along these lines of the below. I don't actually know if it's feasible, but it didn't look as bad as I expected, so I'd kind of like somebody to try it out. I think you would have to call this via a DMI hook (do you have DMI on arm64?), maybe from pnp_init() or similar.
We do have SMBIOS/DMI on arm64, but we have been successful so far not to rely on it for quirks, and we'd very much like to keep it that way.
Since this ACPI _CRS method has nothing to do with SMBIOS/DMI, surely there is a better way to wire up the reservation code to the information exposed by ACPI?
I'm open to other ways, feel free to propose one :)
If you do a quirk, you need some way to identify the machine/firmware combination, because you don't want to apply the quirk on every machine. You're trying to work around a firmware issue, so you probably want something tied to the firmware version. On x86, that's typically done with DMI.
I think I misunderstood the purpose of the example: that should only be necessary if the _CRS methods are missing from the firmware, right? If we update the firmware to cover all non-enumerable resources by such a method, we shouldn't need any such quirks at all IIUC
Yes that's correct you need a quirk to fabricate a PNP0c02 motherboard resource if it is not present in FW.
Lorenzo
On Thu, Nov 10, 2016 at 06:25:16PM +0800, Ard Biesheuvel wrote:
On 10 November 2016 at 06:49, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 08:29:23PM +0000, Ard Biesheuvel wrote:
Hi Bjorn,
On 9 November 2016 at 20:06, Bjorn Helgaas helgaas@kernel.org wrote:
On Wed, Nov 09, 2016 at 02:25:56PM -0500, Christopher Covington wrote:
Hi Bjorn,
[...]
We're working to add the PNP0C02 resource to future firmware, but it's not in the current firmware. Are dmesg and /proc/iomem from the current firmware interesting or should we wait for the update to file?
Note that the ECAM space is not the only thing that should be described via these PNP0C02 devices. *All* non-enumerable resources should be described by the _CRS method of some ACPI device. Here's a sample from my laptop:
PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000) system 00:01: [io 0x1800-0x189f] could not be reserved system 00:01: [io 0x0800-0x087f] has been reserved system 00:01: [io 0x0880-0x08ff] has been reserved system 00:01: [io 0x0900-0x097f] has been reserved system 00:01: [io 0x0980-0x09ff] has been reserved system 00:01: [io 0x0a00-0x0a7f] has been reserved system 00:01: [io 0x0a80-0x0aff] has been reserved system 00:01: [io 0x0b00-0x0b7f] has been reserved system 00:01: [io 0x0b80-0x0bff] has been reserved system 00:01: [io 0x15e0-0x15ef] has been reserved system 00:01: [io 0x1600-0x167f] has been reserved system 00:01: [io 0x1640-0x165f] has been reserved system 00:01: [mem 0xf8000000-0xfbffffff] could not be reserved system 00:01: [mem 0xfed10000-0xfed13fff] has been reserved system 00:01: [mem 0xfed18000-0xfed18fff] has been reserved system 00:01: [mem 0xfed19000-0xfed19fff] has been reserved system 00:01: [mem 0xfeb00000-0xfebfffff] has been reserved system 00:01: [mem 0xfed20000-0xfed3ffff] has been reserved system 00:01: [mem 0xfed90000-0xfed93fff] has been reserved system 00:01: [mem 0xf7fe0000-0xf7ffffff] has been reserved system 00:01: Plug and Play ACPI device, IDs PNP0c02 (active)
Do you have firmware in the field that may not get updated? If so, I'd like to see the whole solution for that firmware, including the MCFG quirk (which tells the PCI core where the ECAM region is) and whatever PNP0C02 quirk you figure out to actually reserve the region.
I proposed a PNP0C02 quirk to Duc along these lines of the below. I don't actually know if it's feasible, but it didn't look as bad as I expected, so I'd kind of like somebody to try it out. I think you would have to call this via a DMI hook (do you have DMI on arm64?), maybe from pnp_init() or similar.
We do have SMBIOS/DMI on arm64, but we have been successful so far not to rely on it for quirks, and we'd very much like to keep it that way.
Since this ACPI _CRS method has nothing to do with SMBIOS/DMI, surely there is a better way to wire up the reservation code to the information exposed by ACPI?
I'm open to other ways, feel free to propose one :)
If you do a quirk, you need some way to identify the machine/firmware combination, because you don't want to apply the quirk on every machine. You're trying to work around a firmware issue, so you probably want something tied to the firmware version. On x86, that's typically done with DMI.
I think I misunderstood the purpose of the example: that should only be necessary if the _CRS methods are missing from the firmware, right? If we update the firmware to cover all non-enumerable resources by such a method, we shouldn't need any such quirks at all IIUC
Right: if the firmware provides a PNP0C02 device with _CRS that includes the ECAM area, we don't need any PNP/ACPI quirks. We will still need the MCFG quirks since the hardware doesn't fully support ECAM.
For the PNP/ACPI quirks, there are two interesting cases:
1) Firmware provides a PNP0C02 device, but its _CRS doesn't include the ECAM space, and
2) Firmware doesn't provide a PNP0C02 device at all.
For case 1, we could consider adding the ECAM space to the existing device. This is essentially what quirk_amd_mmconfig_area() does.
For case 2, we would have to fabricate the PNP0C02 device itself, then add the ECAM space to it. I don't think there's any existing code that does this, so this is what the example I proposed in this thread does.
One could argue that it might be cleaner to use case 2 instead of the case 1 approach because it avoids mucking with an ACPI device from firmware. For devices that support _SRS, case 1 might break things because _CRS and _SRS are supposed to use the same resource descriptor buffer, and if we add resources the firmware doesn't know about, I don't think we'll encode the _SRS buffer correctly. But this is only a theoretical risk because we basically never use _SRS today.
In either case, there has to be a mechanism to do the quirk only on the machine/firmware that needs it, of course.
Bjorn
On 11/10/2016 12:42 PM, Bjorn Helgaas wrote:
For the PNP/ACPI quirks, there are two interesting cases:
Firmware provides a PNP0C02 device, but its _CRS doesn't include the ECAM space, and
Firmware doesn't provide a PNP0C02 device at all.
For case 1, we could consider adding the ECAM space to the existing device. This is essentially what quirk_amd_mmconfig_area() does.
For case 2, we would have to fabricate the PNP0C02 device itself, then add the ECAM space to it. I don't think there's any existing code that does this, so this is what the example I proposed in this thread does.
(this isn't QCOM/QDT specific) We'll go scrub for examples where there are systems missing the motherboard resource and get firmware fixed. As an example, I know that HPE ProLiant m400 (Moonshot) will need to be updated. It would probably be easier to just get the firmware fixed to add this than to introduce the first DMI quirk for this one.
Ard and others very reasonably want to avoid DMI quirks on arm64. I take responsibility for being the guilty party that wrote SMBIOS/DMI into the SBBR originally as a means of keeping this failsafe for the future and because "that's what x86 does, so people will expect it". But we'll save that for a nasty situation further down the road. We are still working on getting vendors (other than QCOM and HPE, who have had this right since the beginning) to release firmware other than version "1.0" every time. That's always a good start ;)
Jon.
Hi Bjorn,
On 09/21/2016 09:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
It looks like a stale "First match against PCI topology domain:bus..." comment remains.
Thanks, Cov
On Wed, Sep 21, 2016 at 06:40:47PM -0400, Christopher Covington wrote:
Hi Bjorn,
On 09/21/2016 09:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
It looks like a stale "First match against PCI topology domain:bus..." comment remains.
Yep. I removed the comment since it's sort of obvious from the code. I also renamed a few things and pulled the match out into a helper function.
I also changed the dmesg note: I think the actual resource and the name of the pci_ecam_ops is more interesting than the table IDs (which I think are already elsewhere in the dmesg log).
Here's the incremental diff, which I can't really test:
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 245b79f..0b36bc5 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -36,7 +36,7 @@ struct mcfg_fixup { char oem_id[ACPI_OEM_ID_SIZE + 1]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; u32 oem_revision; - u16 seg; + u16 segment; struct resource bus_range; struct pci_ecam_ops *ops; struct resource cfgres; @@ -102,30 +102,37 @@ static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; static u32 mcfg_oem_revision;
-static void pci_mcfg_match_quirks(struct acpi_pci_root *root, +static int pci_mcfg_quirk_matches(struct mcfg_fixup *f, u16 segment, + struct resource *bus_range) +{ + if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) && + !memcmp(f->oem_table_id, mcfg_oem_table_id, + ACPI_OEM_TABLE_ID_SIZE) && + f->oem_revision == mcfg_oem_revision && + f->segment == segment && + resource_contains(&f->bus_range, bus_range)) + return 1; + + return 0; +} + +static void pci_mcfg_apply_quirks(struct acpi_pci_root *root, struct resource *cfgres, struct pci_ecam_ops **ecam_ops) { + u16 segment = root->segment; + struct resource *bus_range = &root->secondary; struct mcfg_fixup *f; int i;
- /* - * First match against PCI topology domain:bus then use OEM ID, OEM - * table ID, and OEM revision from MCFG table standard header. - */ for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) { - if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) && - !memcmp(f->oem_table_id, mcfg_oem_table_id, - ACPI_OEM_TABLE_ID_SIZE) && - f->oem_revision == mcfg_oem_revision && - f->seg == root->segment && - resource_contains(&f->bus_range, &root->secondary)) { + if (pci_mcfg_quirk_matches(f, segment, bus_range)) { if (f->cfgres.start) *cfgres = f->cfgres; if (f->ops) *ecam_ops = f->ops; - dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n", - f->oem_id, f->oem_table_id, f->oem_revision); + dev_info(&root->device->dev, "MCFG quirk: ECAM space for %pR at %pR with %ps\n", + bus_range, cfgres, *ecam_ops); return; } } @@ -173,7 +180,7 @@ skip_lookup: * MCFG does not have it. Invalid CFG start address means MCFG * firmware bug or we need another quirk in array. */ - pci_mcfg_match_quirks(root, &res, &ops); + pci_mcfg_apply_quirks(root, &res, &ops); if (!res.start) return -ENXIO;
On 09/22/2016 07:08 PM, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:40:47PM -0400, Christopher Covington wrote:
Hi Bjorn,
On 09/21/2016 09:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
It looks like a stale "First match against PCI topology domain:bus..." comment remains.
Yep. I removed the comment since it's sort of obvious from the code. I also renamed a few things and pulled the match out into a helper function.
I also changed the dmesg note: I think the actual resource and the name of the pci_ecam_ops is more interesting than the table IDs (which I think are already elsewhere in the dmesg log).
It looks like the resource is already being printed from drivers/pci/ecam.c:102.
Here's the incremental diff, which I can't really test:
Here's what it looks like for me:
ACPI: PCI Root Bridge [PCI2] (domain 0002 [bus 00-1f]) acpi PNP0A08:02: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI] acpi PNP0A08:02: _OSC: platform does not support [PCIeHotplug] acpi PNP0A08:02: _OSC: OS now controls [PME AER PCIeCapability] acpi PNP0A08:02: MCFG quirk: ECAM space for [bus 00-1f] at [mem 0xa0000000000-0xa0001ffffff] with pci_3 acpi PNP0A08:02: ECAM at [mem 0xa0000000000-0xa0001ffffff] for [bus 00-1f] Remapped I/O 0x00000affffff0000 to [io 0x10000-0x1ffff window] PCI host bridge to bus 0002:00
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 245b79f..0b36bc5 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -36,7 +36,7 @@ struct mcfg_fixup { char oem_id[ACPI_OEM_ID_SIZE + 1]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; u32 oem_revision;
- u16 seg;
- u16 segment; struct resource bus_range; struct pci_ecam_ops *ops; struct resource cfgres;
@@ -102,30 +102,37 @@ static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; static u32 mcfg_oem_revision; -static void pci_mcfg_match_quirks(struct acpi_pci_root *root, +static int pci_mcfg_quirk_matches(struct mcfg_fixup *f, u16 segment,
struct resource *bus_range)
+{
- if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision &&
f->segment == segment &&
resource_contains(&f->bus_range, bus_range))
return 1;
- return 0;
+}
+static void pci_mcfg_apply_quirks(struct acpi_pci_root *root, struct resource *cfgres, struct pci_ecam_ops **ecam_ops) {
- u16 segment = root->segment;
- struct resource *bus_range = &root->secondary; struct mcfg_fixup *f; int i;
- /*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {*/
if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision &&
f->seg == root->segment &&
resource_contains(&f->bus_range, &root->secondary)) {
if (pci_mcfg_quirk_matches(f, segment, bus_range)) { if (f->cfgres.start) *cfgres = f->cfgres; if (f->ops) *ecam_ops = f->ops;
dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n",
f->oem_id, f->oem_table_id, f->oem_revision);
dev_info(&root->device->dev, "MCFG quirk: ECAM space for %pR at %pR with %ps\n",
} }bus_range, cfgres, *ecam_ops); return;
@@ -173,7 +180,7 @@ skip_lookup: * MCFG does not have it. Invalid CFG start address means MCFG * firmware bug or we need another quirk in array. */
- pci_mcfg_match_quirks(root, &res, &ops);
- pci_mcfg_apply_quirks(root, &res, &ops); if (!res.start) return -ENXIO;
On Fri, Sep 23, 2016 at 02:41:39PM -0400, Christopher Covington wrote:
On 09/22/2016 07:08 PM, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:40:47PM -0400, Christopher Covington wrote:
Hi Bjorn,
On 09/21/2016 09:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index eb14f74..bb3b8ad 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -42,86 +42,59 @@ struct mcfg_fixup { struct resource cfgres; };
-#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
It looks like a stale "First match against PCI topology domain:bus..." comment remains.
Yep. I removed the comment since it's sort of obvious from the code. I also renamed a few things and pulled the match out into a helper function.
I also changed the dmesg note: I think the actual resource and the name of the pci_ecam_ops is more interesting than the table IDs (which I think are already elsewhere in the dmesg log).
It looks like the resource is already being printed from drivers/pci/ecam.c:102.
Yes, but I want a hint that a quirk has overridden it because that's a clue that there's something wonky about the platform or the firmware.
But I guess it'd be nice to mirror the format of the existing info (mem first, then bus range).
Here's the incremental diff, which I can't really test:
Here's what it looks like for me:
ACPI: PCI Root Bridge [PCI2] (domain 0002 [bus 00-1f]) acpi PNP0A08:02: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI] acpi PNP0A08:02: _OSC: platform does not support [PCIeHotplug] acpi PNP0A08:02: _OSC: OS now controls [PME AER PCIeCapability] acpi PNP0A08:02: MCFG quirk: ECAM space for [bus 00-1f] at [mem 0xa0000000000-0xa0001ffffff] with pci_3
Is "pci_3" really the entire name? If not, what happened to the rest? I was hoping for a symbol we could grep for.
acpi PNP0A08:02: ECAM at [mem 0xa0000000000-0xa0001ffffff] for [bus 00-1f] Remapped I/O 0x00000affffff0000 to [io 0x10000-0x1ffff window] PCI host bridge to bus 0002:00
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 245b79f..0b36bc5 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -36,7 +36,7 @@ struct mcfg_fixup { char oem_id[ACPI_OEM_ID_SIZE + 1]; char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; u32 oem_revision;
- u16 seg;
- u16 segment; struct resource bus_range; struct pci_ecam_ops *ops; struct resource cfgres;
@@ -102,30 +102,37 @@ static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; static char mcfg_oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; static u32 mcfg_oem_revision; -static void pci_mcfg_match_quirks(struct acpi_pci_root *root, +static int pci_mcfg_quirk_matches(struct mcfg_fixup *f, u16 segment,
struct resource *bus_range)
+{
- if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision &&
f->segment == segment &&
resource_contains(&f->bus_range, bus_range))
return 1;
- return 0;
+}
+static void pci_mcfg_apply_quirks(struct acpi_pci_root *root, struct resource *cfgres, struct pci_ecam_ops **ecam_ops) {
- u16 segment = root->segment;
- struct resource *bus_range = &root->secondary; struct mcfg_fixup *f; int i;
- /*
* First match against PCI topology <domain:bus> then use OEM ID, OEM
* table ID, and OEM revision from MCFG table standard header.
for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {*/
if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(f->oem_table_id, mcfg_oem_table_id,
ACPI_OEM_TABLE_ID_SIZE) &&
f->oem_revision == mcfg_oem_revision &&
f->seg == root->segment &&
resource_contains(&f->bus_range, &root->secondary)) {
if (pci_mcfg_quirk_matches(f, segment, bus_range)) { if (f->cfgres.start) *cfgres = f->cfgres; if (f->ops) *ecam_ops = f->ops;
dev_info(&root->device->dev, "Applying PCI MCFG quirks for %s %s rev: %d\n",
f->oem_id, f->oem_table_id, f->oem_revision);
dev_info(&root->device->dev, "MCFG quirk: ECAM space for %pR at %pR with %ps\n",
} }bus_range, cfgres, *ecam_ops); return;
@@ -173,7 +180,7 @@ skip_lookup: * MCFG does not have it. Invalid CFG start address means MCFG * firmware bug or we need another quirk in array. */
- pci_mcfg_match_quirks(root, &res, &ops);
- pci_mcfg_apply_quirks(root, &res, &ops); if (!res.start) return -ENXIO;
-- Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
On 09/23/2016 03:17 PM, Bjorn Helgaas wrote:
On Fri, Sep 23, 2016 at 02:41:39PM -0400, Christopher Covington wrote:
On 09/22/2016 07:08 PM, Bjorn Helgaas wrote:
On Wed, Sep 21, 2016 at 06:40:47PM -0400, Christopher Covington wrote:
Hi Bjorn,
On 09/21/2016 09:11 AM, Bjorn Helgaas wrote:
On Tue, Sep 20, 2016 at 09:15:14PM -0400, cov@codeaurora.org wrote:
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c > index eb14f74..bb3b8ad 100644 > --- a/drivers/acpi/pci_mcfg.c > +++ b/drivers/acpi/pci_mcfg.c > @@ -42,86 +42,59 @@ struct mcfg_fixup { > struct resource cfgres; > }; > > -#define MCFG_DOM_ANY (-1)
Did you delete this because there were no current users, because you'd prefer users just use "-1", or for some other reason?
I removed it because there were no users of it and, more importantly, the code doesn't implement support for it.
It looks like a stale "First match against PCI topology domain:bus..." comment remains.
Yep. I removed the comment since it's sort of obvious from the code. I also renamed a few things and pulled the match out into a helper function.
I also changed the dmesg note: I think the actual resource and the name of the pci_ecam_ops is more interesting than the table IDs (which I think are already elsewhere in the dmesg log).
It looks like the resource is already being printed from drivers/pci/ecam.c:102.
Yes, but I want a hint that a quirk has overridden it because that's a clue that there's something wonky about the platform or the firmware.
But I guess it'd be nice to mirror the format of the existing info (mem first, then bus range).
Here's the incremental diff, which I can't really test:
Here's what it looks like for me:
ACPI: PCI Root Bridge [PCI2] (domain 0002 [bus 00-1f]) acpi PNP0A08:02: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI] acpi PNP0A08:02: _OSC: platform does not support [PCIeHotplug] acpi PNP0A08:02: _OSC: OS now controls [PME AER PCIeCapability] acpi PNP0A08:02: MCFG quirk: ECAM space for [bus 00-1f] at [mem 0xa0000000000-0xa0001ffffff] with pci_3
Is "pci_3" really the entire name? If not, what happened to the rest? I was hoping for a symbol we could grep for.
The full name is pci_32b_ops. The print overflowed my tmux pane.
acpi PNP0A08:02: ECAM at [mem 0xa0000000000-0xa0001ffffff] for [bus 00-1f] Remapped I/O 0x00000affffff0000 to [io 0x10000-0x1ffff window] PCI host bridge to bus 0002:00
Thanks, Cov
Hi Bjorn,
Thanks for your attention to this series. If I'm remembering the earlier comments correctly, the core quirks framework is in pretty good shape. If some of the more complicated patches using the framework still need discussion or revision, what would you think of including the framework and its simplest user [1] in 4.9?
1. https://patchwork.ozlabs.org/patch/673072/
Thanks, Christopher Covington
Currently we use one shared global acpi_pci_root_ops structure to keep controller-specific ops. Then its pointer is passed to acpi_pci_root_create() and associated with host bridge instance for good. Such design implies serious drawback. Any potential manipulation on the single system-wide acpi_pci_root_ops leads to kernel crash. The structure content is not really changing even across multiple host bridges creation thus it was not the issue so far.
In preparation for adding ECAM quirks mechanism (where controller-specific PCI ops may be different for each host bridge) allocate new acpi_pci_root_ops and fill in with data for each bridge. Now it is safe to have different controller-specific info. As a consequence free acpi_pci_root_ops when host bridge is released.
No functional changes in this patch.
Signed-off-by: Tomasz Nowicki tn@semihalf.com --- arch/arm64/kernel/pci.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index fb439c7..5c08baf 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -152,33 +152,36 @@ static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
ri = container_of(ci, struct acpi_pci_generic_root_info, common); pci_ecam_free(ri->cfg); + kfree(ci->ops); kfree(ri); }
-static struct acpi_pci_root_ops acpi_pci_root_ops = { - .release_info = pci_acpi_generic_release_info, -}; - /* Interface called from ACPI code to setup PCI host controller */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { int node = acpi_get_node(root->device->handle); struct acpi_pci_generic_root_info *ri; struct pci_bus *bus, *child; + struct acpi_pci_root_ops *root_ops;
ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node); if (!ri) return NULL;
+ root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); + if (!root_ops) + return NULL; + ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { kfree(ri); + kfree(root_ops); return NULL; }
- acpi_pci_root_ops.pci_ops = &ri->cfg->ops->pci_ops; - bus = acpi_pci_root_create(root, &acpi_pci_root_ops, &ri->common, - ri->cfg); + root_ops->release_info = pci_acpi_generic_release_info; + root_ops->pci_ops = &ri->cfg->ops->pci_ops; + bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) return NULL;
On Thu, Nov 24, 2016 at 12:05:23PM +0100, Tomasz Nowicki wrote:
Currently we use one shared global acpi_pci_root_ops structure to keep controller-specific ops. Then its pointer is passed to acpi_pci_root_create() and associated with host bridge instance for good. Such design implies serious drawback. Any potential manipulation on the single system-wide acpi_pci_root_ops leads to kernel crash. The structure content is not really changing even across multiple host bridges creation thus it was not the issue so far.
In preparation for adding ECAM quirks mechanism (where controller-specific PCI ops may be different for each host bridge) allocate new acpi_pci_root_ops and fill in with data for each bridge. Now it is safe to have different controller-specific info. As a consequence free acpi_pci_root_ops when host bridge is released.
No functional changes in this patch.
Signed-off-by: Tomasz Nowicki tn@semihalf.com
Applied to pci/ecam for v4.10, thanks, Tomasz!
arch/arm64/kernel/pci.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index fb439c7..5c08baf 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -152,33 +152,36 @@ static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci) ri = container_of(ci, struct acpi_pci_generic_root_info, common); pci_ecam_free(ri->cfg);
- kfree(ci->ops); kfree(ri);
} -static struct acpi_pci_root_ops acpi_pci_root_ops = {
- .release_info = pci_acpi_generic_release_info,
-};
/* Interface called from ACPI code to setup PCI host controller */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { int node = acpi_get_node(root->device->handle); struct acpi_pci_generic_root_info *ri; struct pci_bus *bus, *child;
- struct acpi_pci_root_ops *root_ops;
ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node); if (!ri) return NULL;
- root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
- if (!root_ops)
return NULL;
- ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { kfree(ri);
return NULL; }kfree(root_ops);
- acpi_pci_root_ops.pci_ops = &ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, &acpi_pci_root_ops, &ri->common,
ri->cfg);
- root_ops->release_info = pci_acpi_generic_release_info;
- root_ops->pci_ops = &ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) return NULL;
2.7.4