Linaro-mm-sig September 2021

linaro-mm-sig@lists.linaro.org

12 participants
35 discussions

Re: [Linaro-mm-sig] [PATCH net-next 1/5] dt-bindings: net: Brcm ASP 2.0 Ethernet controller

by Rob Herring

On Fri, 24 Sep 2021 14:44:47 -0700, Justin Chen wrote: > From: Florian Fainelli <f.fainelli(a)gmail.com> > > Add a binding document for the Broadcom ASP 2.0 Ethernet controller. > > Signed-off-by: Florian Fainelli <f.fainelli(a)gmail.com> > Signed-off-by: Justin Chen <justinpopo6(a)gmail.com> > --- > .../devicetree/bindings/net/brcm,asp-v2.0.yaml | 147 +++++++++++++++++++++ > 1 file changed, 147 insertions(+) > create mode 100644 Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml > My bot found errors running 'make DT_CHECKER_FLAGS=-m dt_binding_check' on your patch (DT_CHECKER_FLAGS is new in v5.13): yamllint warnings/errors: ./Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml:79:10: [warning] wrong indentation: expected 10 but found 9 (indentation) dtschema/dtc warnings/errors: /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/net/brcm,asp-v2.0.example.dt.yaml: asp@9c00000: 'mdio@c614', 'mdio@ce14' do not match any of the regexes: 'pinctrl-[0-9]+' From schema: /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml Documentation/devicetree/bindings/net/brcm,asp-v2.0.example.dt.yaml:0:0: /example-0/asp@9c00000/mdio@c614: failed to match any schema with compatible: ['brcm,asp-v2.0-mdio'] Documentation/devicetree/bindings/net/brcm,asp-v2.0.example.dt.yaml:0:0: /example-0/asp@9c00000/mdio@ce14: failed to match any schema with compatible: ['brcm,asp-v2.0-mdio'] doc reference errors (make refcheckdocs): See https://patchwork.ozlabs.org/patch/1532528 This check can fail if there are any dependencies. The base for a patch series is generally the most recent rc1. If you already ran 'make dt_binding_check' and didn't see the above error(s), then make sure 'yamllint' is installed and dt-schema is up to date: pip3 install dtschema --upgrade Please check and re-submit.

3 years, 11 months

Re: [Linaro-mm-sig] [PATCH net-next 3/5] net: bcmasp: Add support for ASP2.0 Ethernet controller

by Andrew Lunn

> +static int bcmasp_probe(struct platform_device *pdev) > +{ > + struct bcmasp_priv *priv; > + struct device_node *ports_node, *intf_node; > + struct device *dev = &pdev->dev; > + int ret, i, wol_irq, count = 0; > + struct bcmasp_intf *intf; > + struct resource *r; > + u32 u32_reserved_filters_bitmask; > + DECLARE_BITMAP(reserved_filters_bitmask, ASP_RX_NET_FILTER_MAX); > + > + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); > + if (!priv) > + return -ENOMEM; > + > + priv->irq = platform_get_irq(pdev, 0); > + if (priv->irq <= 0) { > + dev_err(dev, "invalid interrupt\n"); > + return -EINVAL; > + } > + > + priv->clk = devm_clk_get(dev, "sw_asp"); > + if (IS_ERR(priv->clk)) { > + if (PTR_ERR(priv->clk) == -EPROBE_DEFER) > + return -EPROBE_DEFER; > + dev_warn(dev, "failed to request clock\n"); > + priv->clk = NULL; > + } devm_clk_get_optional() makes this simpler/ > + > + /* Base from parent node */ > + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + priv->base = devm_ioremap_resource(&pdev->dev, r); > + if (IS_ERR(priv->base)) { > + dev_err(dev, "failed to iomap\n"); > + return PTR_ERR(priv->base); > + } > + > + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); > + if (ret) > + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); > + if (ret) { > + dev_err(&pdev->dev, "unable to set DMA mask: %d\n", ret); > + return ret; > + } > + > + dev_set_drvdata(&pdev->dev, priv); > + priv->pdev = pdev; > + spin_lock_init(&priv->mda_lock); > + spin_lock_init(&priv->clk_lock); > + mutex_init(&priv->net_lock); > + > + ret = clk_prepare_enable(priv->clk); > + if (ret) > + return ret; > + > + /* Enable all clocks to ensure successful probing */ > + bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); > + > + /* Switch to the main clock */ > + bcmasp_core_clock_select(priv, false); > + > + _intr2_mask_set(priv, 0xffffffff); > + intr2_core_wl(priv, 0xffffffff, ASP_INTR2_CLEAR); > + > + ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0, > + pdev->name, priv); > + if (ret) { > + dev_err(dev, "failed to request ASP interrupt: %d\n", ret); > + return ret; > + } Do you need to undo clk_prepare_enable()? > + > + /* Register mdio child nodes */ > + of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, > + dev); > + > + ret = of_property_read_u32(dev->of_node, > + "brcm,reserved-net-filters-mask", > + &u32_reserved_filters_bitmask); > + if (ret) > + u32_reserved_filters_bitmask = 0; > + > + priv->net_filters_count_max = ASP_RX_NET_FILTER_MAX; > + bitmap_zero(reserved_filters_bitmask, priv->net_filters_count_max); > + bitmap_from_arr32(reserved_filters_bitmask, > + &u32_reserved_filters_bitmask, > + priv->net_filters_count_max); > + > + /* Discover bitmask of reserved filters */ > + for_each_set_bit(i, reserved_filters_bitmask, ASP_RX_NET_FILTER_MAX) { > + priv->net_filters[i].reserved = true; > + priv->net_filters_count_max--; > + } > + > + /* > + * ASP specific initialization, Needs to be done irregardless of > + * of how many interfaces come up. > + */ > + bcmasp_core_init(priv); > + bcmasp_core_init_filters(priv); > + > + ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports"); > + if (!ports_node) { > + dev_warn(dev, "No ports found\n"); > + return 0; > + } > + > + priv->intf_count = of_get_available_child_count(ports_node); > + > + priv->intfs = devm_kcalloc(dev, priv->intf_count, > + sizeof(struct bcmasp_intf *), > + GFP_KERNEL); > + if (!priv->intfs) > + return -ENOMEM; > + > + /* Probe each interface (Initalization should continue even if > + * interfaces are unable to come up) > + */ > + i = 0; > + for_each_available_child_of_node(ports_node, intf_node) { > + wol_irq = platform_get_irq_optional(pdev, i + 1); > + priv->intfs[i++] = bcmasp_interface_create(priv, intf_node, > + wol_irq); > + } > + > + /* Drop the clock reference count now and let ndo_open()/ndo_close() > + * manage it for us from now on. > + */ > + bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE); > + > + clk_disable_unprepare(priv->clk); > + > + /* Now do the registration of the network ports which will take care of > + * managing the clock properly. > + */ > + for (i = 0; i < priv->intf_count; i++) { > + intf = priv->intfs[i]; > + if (!intf) > + continue; > + > + ret = register_netdev(intf->ndev); > + if (ret) { > + netdev_err(intf->ndev, > + "failed to register net_device: %d\n", ret); > + bcmasp_interface_destroy(intf, false); > + continue; > + } > + count++; > + } > + > + dev_info(dev, "Initialized %d port(s)\n", count); > + > + return 0; > +} > + > +static int bcmasp_remove(struct platform_device *pdev) > +{ > + struct bcmasp_priv *priv = dev_get_drvdata(&pdev->dev); > + struct bcmasp_intf *intf; > + int i; > + > + for (i = 0; i < priv->intf_count; i++) { > + intf = priv->intfs[i]; > + if (!intf) > + continue; > + > + bcmasp_interface_destroy(intf, true); > + } > + > + return 0; > +} Do you need to depopulate the mdio children? > +static void bcmasp_get_drvinfo(struct net_device *dev, > + struct ethtool_drvinfo *info) > +{ > + strlcpy(info->driver, "bcmasp", sizeof(info->driver)); > + strlcpy(info->version, "v2.0", sizeof(info->version)); Please drop version. The core will fill it in with the kernel version, which is more useful. > +static int bcmasp_nway_reset(struct net_device *dev) > +{ > + if (!dev->phydev) > + return -ENODEV; > + > + return genphy_restart_aneg(dev->phydev); > +} phy_ethtool_nway_reset(). > +static void bcmasp_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) > +{ > + struct bcmasp_intf *intf = netdev_priv(dev); > + > + wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; > + wol->wolopts = intf->wolopts; > + memset(wol->sopass, 0, sizeof(wol->sopass)); > + > + if (wol->wolopts & WAKE_MAGICSECURE) > + memcpy(wol->sopass, intf->sopass, sizeof(intf->sopass)); > +} Maybe consider calling into the PHY to see what it can do? If the PHY can do the WoL you want, it will do it with less power. > +static int bcmasp_set_priv_flags(struct net_device *dev, u32 flags) > +{ > + struct bcmasp_intf *intf = netdev_priv(dev); > + > + intf->wol_keep_rx_en = flags & BCMASP_WOL_KEEP_RX_EN ? 1 : 0; > + > + return 0; Please could you explain this some more. How can you disable RX and still have WoL working? > +static void bcmasp_adj_link(struct net_device *dev) > +{ > + struct bcmasp_intf *intf = netdev_priv(dev); > + struct phy_device *phydev = dev->phydev; > + int changed = 0; > + u32 cmd_bits = 0, reg; > + > + if (intf->old_link != phydev->link) { > + changed = 1; > + intf->old_link = phydev->link; > + } > + > + if (intf->old_duplex != phydev->duplex) { > + changed = 1; > + intf->old_duplex = phydev->duplex; > + } > + > + switch (phydev->speed) { > + case SPEED_2500: > + cmd_bits = UMC_CMD_SPEED_2500; All i've seen is references to RGMII. Is 2500 possible? > + break; > + case SPEED_1000: > + cmd_bits = UMC_CMD_SPEED_1000; > + break; > + case SPEED_100: > + cmd_bits = UMC_CMD_SPEED_100; > + break; > + case SPEED_10: > + cmd_bits = UMC_CMD_SPEED_10; > + break; > + default: > + break; > + } > + cmd_bits <<= UMC_CMD_SPEED_SHIFT; > + > + if (phydev->duplex == DUPLEX_HALF) > + cmd_bits |= UMC_CMD_HD_EN; > + > + if (intf->old_pause != phydev->pause) { > + changed = 1; > + intf->old_pause = phydev->pause; > + } > + > + if (!phydev->pause) > + cmd_bits |= UMC_CMD_RX_PAUSE_IGNORE | UMC_CMD_TX_PAUSE_IGNORE; > + > + if (!changed) > + return; Shouldn't there be a comparison intd->old_speed != phydev->speed? You are risking the PHY can change speed without doing a link down/up? > + > + if (phydev->link) { > + reg = umac_rl(intf, UMC_CMD); > + reg &= ~((UMC_CMD_SPEED_MASK << UMC_CMD_SPEED_SHIFT) | > + UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE | > + UMC_CMD_TX_PAUSE_IGNORE); > + reg |= cmd_bits; > + umac_wl(intf, reg, UMC_CMD); > + > + /* Enable RGMII pad */ > + reg = rgmii_rl(intf, RGMII_OOB_CNTRL); > + reg |= RGMII_MODE_EN; > + rgmii_wl(intf, reg, RGMII_OOB_CNTRL); > + > + intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0; > + bcmasp_eee_enable_set(intf, intf->eee.eee_active); > + } else { > + /* Disable RGMII pad */ > + reg = rgmii_rl(intf, RGMII_OOB_CNTRL); > + reg &= ~RGMII_MODE_EN; > + rgmii_wl(intf, reg, RGMII_OOB_CNTRL); > + } > + > + if (changed) > + phy_print_status(phydev); There has already been a return if !changed. > +static void bcmasp_configure_port(struct bcmasp_intf *intf) > +{ > + u32 reg, id_mode_dis = 0; > + > + reg = rgmii_rl(intf, RGMII_PORT_CNTRL); > + reg &= ~RGMII_PORT_MODE_MASK; > + > + switch (intf->phy_interface) { > + case PHY_INTERFACE_MODE_RGMII: > + /* RGMII_NO_ID: TXC transitions at the same time as TXD > + * (requires PCB or receiver-side delay) > + * RGMII: Add 2ns delay on TXC (90 degree shift) > + * > + * ID is implicitly disabled for 100Mbps (RG)MII operation. > + */ > + id_mode_dis = RGMII_ID_MODE_DIS; > + fallthrough; > + case PHY_INTERFACE_MODE_RGMII_TXID: > + reg |= RGMII_PORT_MODE_EXT_GPHY; > + break; > + case PHY_INTERFACE_MODE_MII: > + reg |= RGMII_PORT_MODE_EXT_EPHY; > + break; > + default: > + break; > + } Can we skip this and let the PHY do the delays? Ah, "This is an ugly quirk..." Maybe add a comment here pointing towards bcmasp_netif_init(), which is explains this. > +static int bcmasp_netif_init(struct net_device *dev, bool phy_connect, > + bool init_rx) > +{ > + struct bcmasp_intf *intf = netdev_priv(dev); > + phy_interface_t phy_iface = intf->phy_interface; > + u32 phy_flags = PHY_BRCM_AUTO_PWRDWN_ENABLE | > + PHY_BRCM_DIS_TXCRXC_NOENRGY | > + PHY_BRCM_IDDQ_SUSPEND; > + struct phy_device *phydev = NULL; > + int ret; > + > + /* Always enable interface clocks */ > + bcmasp_core_clock_set_intf(intf, true); > + > + /* Enable internal PHY before any MAC activity */ > + if (intf->internal_phy) > + bcmasp_ephy_enable_set(intf, true); > + > + bcmasp_configure_port(intf); > + > + /* This is an ugly quirk but we have not been correctly interpreting > + * the phy_interface values and we have done that across different > + * drivers, so at least we are consistent in our mistakes. > + * > + * When the Generic PHY driver is in use either the PHY has been > + * strapped or programmed correctly by the boot loader so we should > + * stick to our incorrect interpretation since we have validated it. > + * > + * Now when a dedicated PHY driver is in use, we need to reverse the > + * meaning of the phy_interface_mode values to something that the PHY > + * driver will interpret and act on such that we have two mistakes > + * canceling themselves so to speak. We only do this for the two > + * modes that GENET driver officially supports on Broadcom STB chips: > + * PHY_INTERFACE_MODE_RGMII and PHY_INTERFACE_MODE_RGMII_TXID. Other > + * modes are not *officially* supported with the boot loader and the > + * scripted environment generating Device Tree blobs for those > + * platforms. > + * > + * Note that internal PHY and fixed-link configurations are not > + * affected because they use different phy_interface_t values or the > + * Generic PHY driver. > + */ > +static inline void bcmasp_map_res(struct bcmasp_priv *priv, > + struct bcmasp_intf *intf) > +{ > + /* Per port */ > + intf->res.umac = priv->base + UMC_OFFSET(intf); > + intf->res.umac2fb = priv->base + UMAC2FB_OFFSET(intf); > + intf->res.rgmii = priv->base + RGMII_OFFSET(intf); > + > + /* Per ch */ > + intf->tx_spb_dma = priv->base + TX_SPB_DMA_OFFSET(intf); > + intf->res.tx_spb_ctrl = priv->base + TX_SPB_CTRL_OFFSET(intf); > + /* > + * Stop gap solution. This should be removed when 72165a0 is > + * deprecated > + */ Is that an internal commit? Andrew

3 years, 11 months

Re: [Linaro-mm-sig] [PATCH net-next 0/5] brcm ASP 2.0 Ethernet controller

by Andrew Lunn

On Fri, Sep 24, 2021 at 02:44:46PM -0700, Justin Chen wrote: > This patch set adds support for Broadcom's ASP 2.0 Ethernet controller. Hi Justin Does the hardware support L2 switching between the two ports? I'm just wondering if later this is going to be modified into a switchdev driver? Andrew

3 years, 11 months

[PATCH 01/27] dma-buf: add dma_resv_for_each_fence_unlocked v6

by Christian König

Abstract the complexity of iterating over all the fences in a dma_resv object. The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one. v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end v3: restructor the code, move rcu_read_lock()/unlock() into the iterator, add dma_resv_iter_is_restarted() v4: fix NULL deref when no explicit fence exists, drop superflous rcu_read_lock()/unlock() calls. v5: fix typos in the documentation v6: fix coding error when excl fence is NULL Signed-off-by: Christian König <christian.koenig(a)amd.com> --- drivers/dma-buf/dma-resv.c | 98 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 95 ++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..97af397304f3 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,104 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/** + * dma_resv_iter_restart_unlocked - restart the unlocked iterator + * @cursor: The dma_resv_iter object to restart + * + * Restart the unlocked iteration by initializing the cursor object. + */ +static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) +{ + cursor->seq = read_seqcount_begin(&cursor->obj->seq); + cursor->index = -1; + if (cursor->all_fences) + cursor->fences = dma_resv_shared_list(cursor->obj); + else + cursor->fences = NULL; + cursor->is_restarted = true; +} + +/** + * dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iteration is started over again. + */ +static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) +{ + struct dma_resv *obj = cursor->obj; + + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence); + + if (cursor->index == -1) { + cursor->fence = dma_resv_excl_fence(obj); + cursor->index++; + if (!cursor->fence) + continue; + + } else if (!cursor->fences || + cursor->index >= cursor->fences->shared_count) { + cursor->fence = NULL; + break; + + } else { + struct dma_resv_list *fences = cursor->fences; + unsigned int idx = cursor->index++; + + cursor->fence = rcu_dereference(fences->shared[idx]); + } + cursor->fence = dma_fence_get_rcu(cursor->fence); + } while (cursor->fence && dma_fence_is_signaled(cursor->fence)); +} + +/** + * dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the first fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + rcu_read_lock(); + do { + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked); + +/** + * dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the next fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart; + + rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..5d7d28cb9008 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,101 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @all_fences: If all fences should be returned */ + bool all_fences; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences */ + struct dma_resv_list *fences; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; +}; + +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); + +/** + * dma_resv_iter_begin - initialize a dma_resv_iter object + * @cursor: The dma_resv_iter object to initialize + * @obj: The dma_resv object which we want to iterate over + * @all_fences: If all fences should be returned or just the exclusive one + */ +static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +} + +/** + * dma_resv_iter_end - cleanup a dma_resv_iter object + * @cursor: the dma_resv_iter object which should be cleaned up + * + * Make sure that the reference to the fence in the cursor is properly + * dropped. + */ +static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == -1; +} + +/** + * dma_resv_iter_is_restarted - test if this is the first fence after a restart + * @cursor: the cursor with the current position + * + * Return true if this is the first fence in an iteration after a restart. + */ +static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) +{ + return cursor->is_restarted; +} + +/** + * dma_resv_for_each_fence_unlocked - unlocked fence iterator + * @cursor: a struct dma_resv_iter pointer + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * &dma_resv.lock and using RCU instead. The cursor needs to be initialized + * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside + * the iterator a reference to the dma_fence is held and the RCU lock dropped. + * When the dma_resv is modified the iteration starts over again. + */ +#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_first_unlocked(cursor); \ + fence; fence = dma_resv_iter_next_unlocked(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -- 2.25.1

3 years, 11 months

Re: [Linaro-mm-sig] [RFC PATCH 2/4] DRM: Add support of AI Processor Unit (APU)

by Christian König

Am 23.09.21 um 02:58 schrieb Dave Airlie: > On Sat, 18 Sept 2021 at 07:57, Alexandre Bailon <abailon(a)baylibre.com> wrote: >> Some Mediatek SoC provides hardware accelerator for AI / ML. >> This driver provides the infrastructure to manage memory >> shared between host CPU and the accelerator, and to submit >> jobs to the accelerator. >> The APU itself is managed by remoteproc so this drivers >> relies on remoteproc to found the APU and get some important data >> from it. But, the driver is quite generic and it should possible >> to manage accelerator using another ways. >> This driver doesn't manage itself the data transmitions. >> It must be registered by another driver implementing the transmitions. >> >> Signed-off-by: Alexandre Bailon <abailon(a)baylibre.com> >> [SNIP] >> Please refer to >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.kerne… >> >> here and below in many places. >> >> There's a lot of missing padding/alignment here. There is also the pahole utility which show you nicely where you need padding for your IOCTL structures. For example "pahole drivers/gpu/drm/amd/amdgpu/amdgpu.ko -C drm_amdgpu_gem_va" gives you: struct drm_amdgpu_gem_va { __u32 handle; /* 0 4 */ __u32 _pad; /* 4 4 */ __u32 operation; /* 8 4 */ __u32 flags; /* 12 4 */ __u64 va_address; /* 16 8 */ __u64 offset_in_bo; /* 24 8 */ __u64 map_size; /* 32 8 */ /* size: 40, cachelines: 1, members: 7 */ /* last cacheline: 40 bytes */ }; And as you can see we have added the _pad field to our IOCTL parameter structure to properly align the 64bit members. Regards, Christian. >> >> I'm trying to find the time to review this stack in full, any writeups >> on how this is used from userspace would be useful (not just the code >> repo, but some sort of how do I get at it) it reads as kinda generic >> (calling it apu), but then has some specifics around device binding. >> >> Dave.

3 years, 12 months

Deploying new iterator interface for dma-buf

by Christian König

Hi guys, The version I've send out yesterday had a rather obvious coding error and I honestly forgot the cover letter. This one here is better tested and will now hopefully not be torn apart from the CI system immediately. I tried to address all review and documentation comments as best as I could, so I'm hoping that we can now considering pushing this. Cheers, Christian.

3 years, 12 months

Re: [Linaro-mm-sig] [PATCH v3 4/9] drm/scheduler: Add fence deadline support

by Rob Clark

On Wed, Sep 22, 2021 at 7:31 AM Andrey Grodzovsky <andrey.grodzovsky(a)amd.com> wrote: > > > On 2021-09-21 11:32 p.m., Rob Clark wrote: > > On Tue, Sep 21, 2021 at 7:18 PM Andrey Grodzovsky > > <andrey.grodzovsky(a)amd.com> wrote: > >> > >> On 2021-09-21 4:47 p.m., Rob Clark wrote: > >>> On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky > >>> <andrey.grodzovsky(a)amd.com> wrote: > >>>> On 2021-09-03 2:47 p.m., Rob Clark wrote: > >>>> > >>>>> From: Rob Clark <robdclark(a)chromium.org> > >>>>> > >>>>> As the finished fence is the one that is exposed to userspace, and > >>>>> therefore the one that other operations, like atomic update, would > >>>>> block on, we need to propagate the deadline from from the finished > >>>>> fence to the actual hw fence. > >>>>> > >>>>> v2: Split into drm_sched_fence_set_parent() (ckoenig) > >>>>> > >>>>> Signed-off-by: Rob Clark <robdclark(a)chromium.org> > >>>>> --- > >>>>> drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++ > >>>>> drivers/gpu/drm/scheduler/sched_main.c | 2 +- > >>>>> include/drm/gpu_scheduler.h | 8 ++++++ > >>>>> 3 files changed, 43 insertions(+), 1 deletion(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c > >>>>> index bcea035cf4c6..4fc41a71d1c7 100644 > >>>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c > >>>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c > >>>>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f) > >>>>> dma_fence_put(&fence->scheduled); > >>>>> } > >>>>> > >>>>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, > >>>>> + ktime_t deadline) > >>>>> +{ > >>>>> + struct drm_sched_fence *fence = to_drm_sched_fence(f); > >>>>> + unsigned long flags; > >>>>> + > >>>>> + spin_lock_irqsave(&fence->lock, flags); > >>>>> + > >>>>> + /* If we already have an earlier deadline, keep it: */ > >>>>> + if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && > >>>>> + ktime_before(fence->deadline, deadline)) { > >>>>> + spin_unlock_irqrestore(&fence->lock, flags); > >>>>> + return; > >>>>> + } > >>>>> + > >>>>> + fence->deadline = deadline; > >>>>> + set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags); > >>>>> + > >>>>> + spin_unlock_irqrestore(&fence->lock, flags); > >>>>> + > >>>>> + if (fence->parent) > >>>>> + dma_fence_set_deadline(fence->parent, deadline); > >>>>> +} > >>>>> + > >>>>> static const struct dma_fence_ops drm_sched_fence_ops_scheduled = { > >>>>> .get_driver_name = drm_sched_fence_get_driver_name, > >>>>> .get_timeline_name = drm_sched_fence_get_timeline_name, > >>>>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = { > >>>>> .get_driver_name = drm_sched_fence_get_driver_name, > >>>>> .get_timeline_name = drm_sched_fence_get_timeline_name, > >>>>> .release = drm_sched_fence_release_finished, > >>>>> + .set_deadline = drm_sched_fence_set_deadline_finished, > >>>>> }; > >>>>> > >>>>> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > >>>>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > >>>>> } > >>>>> EXPORT_SYMBOL(to_drm_sched_fence); > >>>>> > >>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, > >>>>> + struct dma_fence *fence) > >>>>> +{ > >>>>> + s_fence->parent = dma_fence_get(fence); > >>>>> + if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, > >>>>> + &s_fence->finished.flags)) > >>>>> + dma_fence_set_deadline(fence, s_fence->deadline); > >>>> I believe above you should pass be s_fence->finished to > >>>> dma_fence_set_deadline > >>>> instead it fence which is the HW fence itself. > >>> Hmm, unless this has changed recently with some patches I don't have, > >>> s_fence->parent is the one signalled by hw, so it is the one we want > >>> to set the deadline on > >>> > >>> BR, > >>> -R > >> > >> No it didn't change. But then when exactly will > >> drm_sched_fence_set_deadline_finished > >> execute such that fence->parent != NULL ? In other words, I am not clear > >> how propagation > >> happens otherwise - if dma_fence_set_deadline is called with the HW > >> fence then the assumption > >> here is that driver provided driver specific > >> dma_fence_ops.dma_fence_set_deadline callback executes > >> but I was under impression that drm_sched_fence_set_deadline_finished is > >> the one that propagates > >> the deadline to the HW fence's callback and for it to execute > >> dma_fence_set_deadline needs to be called > >> with s_fence->finished. > > Assuming I didn't screw up drm/msm conversion to scheduler, > > &s_fence->finished is the one that will be returned to userspace.. and > > later passed back to kernel for atomic commit (or to the compositor). > > So it is the one that fence->set_deadline() will be called on. But > > s_fence->parent is the actual hw fence that needs to know about the > > deadline. Depending on whether or not the job has been written into > > hw ringbuffer or not, there are two cases: > > > > 1) not scheduled yet, s_fence will store the deadline and propagate it > > later once s_fence->parent is known > > > And by later you mean the call to drm_sched_fence_set_parent > after HW fence is returned ? If yes I think i get it now. Yup :-) BR, -R > Andrey > > > > 2) already scheduled, in which case s_fence->finished.set_deadline > > will propagate it directly to the real fence > > > > BR, > > -R > > > >> Andrey > >> > >> > >> > >>>> Andrey > >>>> > >>>> > >>>>> +} > >>>>> + > >>>>> struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, > >>>>> void *owner) > >>>>> { > >>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > >>>>> index 595e47ff7d06..27bf0ac0625f 100644 > >>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c > >>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c > >>>>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param) > >>>>> drm_sched_fence_scheduled(s_fence); > >>>>> > >>>>> if (!IS_ERR_OR_NULL(fence)) { > >>>>> - s_fence->parent = dma_fence_get(fence); > >>>>> + drm_sched_fence_set_parent(s_fence, fence); > >>>>> r = dma_fence_add_callback(fence, &sched_job->cb, > >>>>> drm_sched_job_done_cb); > >>>>> if (r == -ENOENT) > >>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > >>>>> index 7f77a455722c..158ddd662469 100644 > >>>>> --- a/include/drm/gpu_scheduler.h > >>>>> +++ b/include/drm/gpu_scheduler.h > >>>>> @@ -238,6 +238,12 @@ struct drm_sched_fence { > >>>>> */ > >>>>> struct dma_fence finished; > >>>>> > >>>>> + /** > >>>>> + * @deadline: deadline set on &drm_sched_fence.finished which > >>>>> + * potentially needs to be propagated to &drm_sched_fence.parent > >>>>> + */ > >>>>> + ktime_t deadline; > >>>>> + > >>>>> /** > >>>>> * @parent: the fence returned by &drm_sched_backend_ops.run_job > >>>>> * when scheduling the job on hardware. We signal the > >>>>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, > >>>>> enum drm_sched_priority priority); > >>>>> bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); > >>>>> > >>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, > >>>>> + struct dma_fence *fence); > >>>>> struct drm_sched_fence *drm_sched_fence_alloc( > >>>>> struct drm_sched_entity *s_entity, void *owner); > >>>>> void drm_sched_fence_init(struct drm_sched_fence *fence,

3 years, 12 months

Re: [Linaro-mm-sig] [PATCH v3 4/9] drm/scheduler: Add fence deadline support

by Rob Clark

On Tue, Sep 21, 2021 at 7:18 PM Andrey Grodzovsky <andrey.grodzovsky(a)amd.com> wrote: > > > On 2021-09-21 4:47 p.m., Rob Clark wrote: > > On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky > > <andrey.grodzovsky(a)amd.com> wrote: > >> On 2021-09-03 2:47 p.m., Rob Clark wrote: > >> > >>> From: Rob Clark <robdclark(a)chromium.org> > >>> > >>> As the finished fence is the one that is exposed to userspace, and > >>> therefore the one that other operations, like atomic update, would > >>> block on, we need to propagate the deadline from from the finished > >>> fence to the actual hw fence. > >>> > >>> v2: Split into drm_sched_fence_set_parent() (ckoenig) > >>> > >>> Signed-off-by: Rob Clark <robdclark(a)chromium.org> > >>> --- > >>> drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++ > >>> drivers/gpu/drm/scheduler/sched_main.c | 2 +- > >>> include/drm/gpu_scheduler.h | 8 ++++++ > >>> 3 files changed, 43 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c > >>> index bcea035cf4c6..4fc41a71d1c7 100644 > >>> --- a/drivers/gpu/drm/scheduler/sched_fence.c > >>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c > >>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f) > >>> dma_fence_put(&fence->scheduled); > >>> } > >>> > >>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, > >>> + ktime_t deadline) > >>> +{ > >>> + struct drm_sched_fence *fence = to_drm_sched_fence(f); > >>> + unsigned long flags; > >>> + > >>> + spin_lock_irqsave(&fence->lock, flags); > >>> + > >>> + /* If we already have an earlier deadline, keep it: */ > >>> + if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && > >>> + ktime_before(fence->deadline, deadline)) { > >>> + spin_unlock_irqrestore(&fence->lock, flags); > >>> + return; > >>> + } > >>> + > >>> + fence->deadline = deadline; > >>> + set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags); > >>> + > >>> + spin_unlock_irqrestore(&fence->lock, flags); > >>> + > >>> + if (fence->parent) > >>> + dma_fence_set_deadline(fence->parent, deadline); > >>> +} > >>> + > >>> static const struct dma_fence_ops drm_sched_fence_ops_scheduled = { > >>> .get_driver_name = drm_sched_fence_get_driver_name, > >>> .get_timeline_name = drm_sched_fence_get_timeline_name, > >>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = { > >>> .get_driver_name = drm_sched_fence_get_driver_name, > >>> .get_timeline_name = drm_sched_fence_get_timeline_name, > >>> .release = drm_sched_fence_release_finished, > >>> + .set_deadline = drm_sched_fence_set_deadline_finished, > >>> }; > >>> > >>> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > >>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > >>> } > >>> EXPORT_SYMBOL(to_drm_sched_fence); > >>> > >>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, > >>> + struct dma_fence *fence) > >>> +{ > >>> + s_fence->parent = dma_fence_get(fence); > >>> + if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, > >>> + &s_fence->finished.flags)) > >>> + dma_fence_set_deadline(fence, s_fence->deadline); > >> > >> I believe above you should pass be s_fence->finished to > >> dma_fence_set_deadline > >> instead it fence which is the HW fence itself. > > Hmm, unless this has changed recently with some patches I don't have, > > s_fence->parent is the one signalled by hw, so it is the one we want > > to set the deadline on > > > > BR, > > -R > > > No it didn't change. But then when exactly will > drm_sched_fence_set_deadline_finished > execute such that fence->parent != NULL ? In other words, I am not clear > how propagation > happens otherwise - if dma_fence_set_deadline is called with the HW > fence then the assumption > here is that driver provided driver specific > dma_fence_ops.dma_fence_set_deadline callback executes > but I was under impression that drm_sched_fence_set_deadline_finished is > the one that propagates > the deadline to the HW fence's callback and for it to execute > dma_fence_set_deadline needs to be called > with s_fence->finished. Assuming I didn't screw up drm/msm conversion to scheduler, &s_fence->finished is the one that will be returned to userspace.. and later passed back to kernel for atomic commit (or to the compositor). So it is the one that fence->set_deadline() will be called on. But s_fence->parent is the actual hw fence that needs to know about the deadline. Depending on whether or not the job has been written into hw ringbuffer or not, there are two cases: 1) not scheduled yet, s_fence will store the deadline and propagate it later once s_fence->parent is known 2) already scheduled, in which case s_fence->finished.set_deadline will propagate it directly to the real fence BR, -R > Andrey > > > > > > >> Andrey > >> > >> > >>> +} > >>> + > >>> struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, > >>> void *owner) > >>> { > >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > >>> index 595e47ff7d06..27bf0ac0625f 100644 > >>> --- a/drivers/gpu/drm/scheduler/sched_main.c > >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c > >>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param) > >>> drm_sched_fence_scheduled(s_fence); > >>> > >>> if (!IS_ERR_OR_NULL(fence)) { > >>> - s_fence->parent = dma_fence_get(fence); > >>> + drm_sched_fence_set_parent(s_fence, fence); > >>> r = dma_fence_add_callback(fence, &sched_job->cb, > >>> drm_sched_job_done_cb); > >>> if (r == -ENOENT) > >>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > >>> index 7f77a455722c..158ddd662469 100644 > >>> --- a/include/drm/gpu_scheduler.h > >>> +++ b/include/drm/gpu_scheduler.h > >>> @@ -238,6 +238,12 @@ struct drm_sched_fence { > >>> */ > >>> struct dma_fence finished; > >>> > >>> + /** > >>> + * @deadline: deadline set on &drm_sched_fence.finished which > >>> + * potentially needs to be propagated to &drm_sched_fence.parent > >>> + */ > >>> + ktime_t deadline; > >>> + > >>> /** > >>> * @parent: the fence returned by &drm_sched_backend_ops.run_job > >>> * when scheduling the job on hardware. We signal the > >>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, > >>> enum drm_sched_priority priority); > >>> bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); > >>> > >>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, > >>> + struct dma_fence *fence); > >>> struct drm_sched_fence *drm_sched_fence_alloc( > >>> struct drm_sched_entity *s_entity, void *owner); > >>> void drm_sched_fence_init(struct drm_sched_fence *fence,

3 years, 12 months

Re: [Linaro-mm-sig] [PATCH v3 4/9] drm/scheduler: Add fence deadline support

by Rob Clark

On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky <andrey.grodzovsky(a)amd.com> wrote: > > On 2021-09-03 2:47 p.m., Rob Clark wrote: > > > From: Rob Clark <robdclark(a)chromium.org> > > > > As the finished fence is the one that is exposed to userspace, and > > therefore the one that other operations, like atomic update, would > > block on, we need to propagate the deadline from from the finished > > fence to the actual hw fence. > > > > v2: Split into drm_sched_fence_set_parent() (ckoenig) > > > > Signed-off-by: Rob Clark <robdclark(a)chromium.org> > > --- > > drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++ > > drivers/gpu/drm/scheduler/sched_main.c | 2 +- > > include/drm/gpu_scheduler.h | 8 ++++++ > > 3 files changed, 43 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c > > index bcea035cf4c6..4fc41a71d1c7 100644 > > --- a/drivers/gpu/drm/scheduler/sched_fence.c > > +++ b/drivers/gpu/drm/scheduler/sched_fence.c > > @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f) > > dma_fence_put(&fence->scheduled); > > } > > > > +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, > > + ktime_t deadline) > > +{ > > + struct drm_sched_fence *fence = to_drm_sched_fence(f); > > + unsigned long flags; > > + > > + spin_lock_irqsave(&fence->lock, flags); > > + > > + /* If we already have an earlier deadline, keep it: */ > > + if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && > > + ktime_before(fence->deadline, deadline)) { > > + spin_unlock_irqrestore(&fence->lock, flags); > > + return; > > + } > > + > > + fence->deadline = deadline; > > + set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags); > > + > > + spin_unlock_irqrestore(&fence->lock, flags); > > + > > + if (fence->parent) > > + dma_fence_set_deadline(fence->parent, deadline); > > +} > > + > > static const struct dma_fence_ops drm_sched_fence_ops_scheduled = { > > .get_driver_name = drm_sched_fence_get_driver_name, > > .get_timeline_name = drm_sched_fence_get_timeline_name, > > @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = { > > .get_driver_name = drm_sched_fence_get_driver_name, > > .get_timeline_name = drm_sched_fence_get_timeline_name, > > .release = drm_sched_fence_release_finished, > > + .set_deadline = drm_sched_fence_set_deadline_finished, > > }; > > > > struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > > @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > > } > > EXPORT_SYMBOL(to_drm_sched_fence); > > > > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, > > + struct dma_fence *fence) > > +{ > > + s_fence->parent = dma_fence_get(fence); > > + if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, > > + &s_fence->finished.flags)) > > + dma_fence_set_deadline(fence, s_fence->deadline); > > > I believe above you should pass be s_fence->finished to > dma_fence_set_deadline > instead it fence which is the HW fence itself. Hmm, unless this has changed recently with some patches I don't have, s_fence->parent is the one signalled by hw, so it is the one we want to set the deadline on BR, -R > Andrey > > > > +} > > + > > struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, > > void *owner) > > { > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > > index 595e47ff7d06..27bf0ac0625f 100644 > > --- a/drivers/gpu/drm/scheduler/sched_main.c > > +++ b/drivers/gpu/drm/scheduler/sched_main.c > > @@ -978,7 +978,7 @@ static int drm_sched_main(void *param) > > drm_sched_fence_scheduled(s_fence); > > > > if (!IS_ERR_OR_NULL(fence)) { > > - s_fence->parent = dma_fence_get(fence); > > + drm_sched_fence_set_parent(s_fence, fence); > > r = dma_fence_add_callback(fence, &sched_job->cb, > > drm_sched_job_done_cb); > > if (r == -ENOENT) > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > > index 7f77a455722c..158ddd662469 100644 > > --- a/include/drm/gpu_scheduler.h > > +++ b/include/drm/gpu_scheduler.h > > @@ -238,6 +238,12 @@ struct drm_sched_fence { > > */ > > struct dma_fence finished; > > > > + /** > > + * @deadline: deadline set on &drm_sched_fence.finished which > > + * potentially needs to be propagated to &drm_sched_fence.parent > > + */ > > + ktime_t deadline; > > + > > /** > > * @parent: the fence returned by &drm_sched_backend_ops.run_job > > * when scheduling the job on hardware. We signal the > > @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, > > enum drm_sched_priority priority); > > bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); > > > > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, > > + struct dma_fence *fence); > > struct drm_sched_fence *drm_sched_fence_alloc( > > struct drm_sched_entity *s_entity, void *owner); > > void drm_sched_fence_init(struct drm_sched_fence *fence,

3 years, 12 months

[PATCH 01/26] dma-buf: add dma_resv_for_each_fence_unlocked v3

by Christian König

Abstract the complexity of iterating over all the fences in a dma_resv object. The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one. v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end v3: restructor the code, move rcu_read_lock()/unlock() into the iterator, add dma_resv_iter_is_restarted() Signed-off-by: Christian König <christian.koenig(a)amd.com> --- drivers/dma-buf/dma-resv.c | 98 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 95 ++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..11b5399f4bd3 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,104 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/** + * dma_resv_iter_restart_unlocked - restart the unlocked iterator + * @cursor: The dma_resv_iter object to restart + * + * Restart the unlocked iteration by initializing the cursor object. + */ +static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) +{ + cursor->seq = read_seqcount_begin(&cursor->obj->seq); + cursor->index = -1; + if (cursor->all_fences) { + rcu_read_lock(); + cursor->fences = dma_resv_shared_list(cursor->obj); + rcu_read_unlock(); + } else { + cursor->fences = NULL; + } + cursor->is_restarted = true; +} + +/** + * dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iterration is started over again. + */ +static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) +{ + struct dma_resv *obj = cursor->obj; + + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence); + + if (cursor->index++ == -1) { + cursor->fence = dma_resv_excl_fence(obj); + cursor->fence = dma_fence_get_rcu(cursor->fence); + + } else if (!cursor->fences || + cursor->index >= cursor->fences->shared_count) { + cursor->fence = NULL; + + } else { + struct dma_resv_list *fences = cursor->fences; + unsigned int idx = cursor->index; + + cursor->fence = rcu_dereference(fences->shared[idx]); + cursor->fence = dma_fence_get_rcu(cursor->fence); + } + } while (cursor->fence && dma_fence_is_signaled(cursor->fence)); +} + +/** + * dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the first fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + rcu_read_lock(); + do { + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked); + +/** + * dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the next fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart; + + rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..baf77a542392 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,101 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @all_fences: If all fences should be returned */ + bool all_fences; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences */ + struct dma_resv_list *fences; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; +}; + +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); + +/** + * dma_resv_iter_begin - initialize a dma_resv_iter object + * @cursor: The dma_resv_iter object to initialize + * @obj: The dma_resv object which we want to iterator over + * @all_fences: If all fences should be returned or just the exclusive one + */ +static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +} + +/** + * dma_resv_iter_end - cleanup a dma_resv_iter object + * @cursor: the dma_resv_iter object which should be cleaned up + * + * Make sure that the reference to the fence in the cursor is properly + * dropped. + */ +static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == -1; +} + +/** + * dma_resv_iter_is_restarted - test if this is the first fence after a restart + * @cursor: the cursor with the current position + * + * Return true if this is the first fence in an interation after a restart. + */ +static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) +{ + return cursor->is_restarted; +} + +/** + * dma_resv_for_each_fence_unlocked - unlocked fence iterator + * @cursor: a struct dma_resv_iter pointer + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * &dma_resv.lock and using RCU instead. The cursor needs to be initialized + * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside + * the iterator a reference to the dma_fence is hold and the RCU lock dropped. + * When the dma_resv is modified the iteration starts over again. + */ +#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_first_unlocked(cursor); \ + fence; fence = dma_resv_iter_next_unlocked(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -- 2.25.1

3 years, 12 months

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Linaro-mm-sig September 2021