[OpenWrt-Devel] [PATCH] ar71xx: check for stuck DMA on AR724x & fix sirq storm after recovery

Conn O'Griofa connogriofa at gmail.com
Sat Jan 9 23:20:27 EST 2016


Hi Felix,

On 08/01/16 23:34, Conn O'Griofa wrote:
> I tried replacing netif_start_queue(dev) with ag71xx_hw_start(ag) in ag71xx_hw_enable. With this change, when the DMA stuck issue occurs, there's no longer any tx timeouts logged, but the interface stops responding. Perhaps it's also necessary to call ag71xx_fast_reset (which only gets called during a link adjust)?

It appears that my hunch was correct, and both are necessary. The following patch works 100% fine; when the dma stuck condition occurs, recovery now completes without any link adjust or any other output from the kernel log*. The tx timeout condition is also avoided thanks to checking for stuck dma, internet connectivity remains stable, and the sirq storm never triggers. It looks like your patch, with my additions, solves the timeout issue perfectly on my device.

One issue I'd like to bring to your attention: with my changes, ag71xx_fast_reset is now called for every chipset as part of the ag71xx_hw_enable function. Prior to this, it was only called during a link adjust on the ar724x chipsets, but now it's going to be called by the ar91xx series, too.

If ag71xx_fast_reset is not suitable for use on the ar91xx series, the patch is going to need changes (and isolating ag71xx_fast_reset to ar724x in ag71xx_hw_enable may not be sufficient for the new ag71xx_restart_work_func to operate properly on ar91xx chips). But if it is suitable for all chipsets, perhaps it would be a good idea to call ag71xx_fast_reset for all chipsets in ag71xx_link_adjust, too?

Conn

*When testing on my device, I added some debug to log the dma stuck condition, just to ensure the problem was actually being triggered during testing.

diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
index 31b38d7..855b348 100644
--- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
+++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
@@ -631,34 +631,63 @@ void ag71xx_link_adjust(struct ag71xx *ag)
  		ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL));
  }
  
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+	int ret;
+
+	ret = ag71xx_rings_init(ag);
+	if (ret)
+		return ret;
+
+	napi_enable(&ag->napi);
+
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+	ag71xx_fast_reset(ag);
+	ag71xx_hw_start(ag);
+
+	return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ag->lock, flags);
+
+	netif_stop_queue(ag->dev);
+
+	ag71xx_hw_stop(ag);
+	ag71xx_dma_reset(ag);
+
+	napi_disable(&ag->napi);
+	del_timer_sync(&ag->oom_timer);
+
+	spin_unlock_irqrestore(&ag->lock, flags);
+
+	ag71xx_rings_cleanup(ag);
+}
+
  static int ag71xx_open(struct net_device *dev)
  {
  	struct ag71xx *ag = netdev_priv(dev);
  	unsigned int max_frame_len;
  	int ret;
  
+	netif_carrier_off(dev);
  	max_frame_len = ag71xx_max_frame_len(dev->mtu);
  	ag->rx_buf_size = max_frame_len + NET_SKB_PAD + NET_IP_ALIGN;
  
  	/* setup max frame length */
  	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
  
-	ret = ag71xx_rings_init(ag);
+	ret = ag71xx_hw_enable(ag);
  	if (ret)
  		goto err;
  
-	napi_enable(&ag->napi);
-
-	netif_carrier_off(dev);
  	ag71xx_phy_start(ag);
  
-	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
-	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
-
-	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
-
-	netif_start_queue(dev);
-
  	return 0;
  
  err:
@@ -669,24 +698,10 @@ err:
  static int ag71xx_stop(struct net_device *dev)
  {
  	struct ag71xx *ag = netdev_priv(dev);
-	unsigned long flags;
  
  	netif_carrier_off(dev);
  	ag71xx_phy_stop(ag);
-
-	spin_lock_irqsave(&ag->lock, flags);
-
-	netif_stop_queue(dev);
-
-	ag71xx_hw_stop(ag);
-	ag71xx_dma_reset(ag);
-
-	napi_disable(&ag->napi);
-	del_timer_sync(&ag->oom_timer);
-
-	spin_unlock_irqrestore(&ag->lock, flags);
-
-	ag71xx_rings_cleanup(ag);
+	ag71xx_hw_disable(ag);
  
  	return 0;
  }
@@ -870,14 +885,10 @@ static void ag71xx_restart_work_func(struct work_struct *work)
  {
  	struct ag71xx *ag = container_of(work, struct ag71xx, restart_work);
  
-	if (ag71xx_get_pdata(ag)->is_ar724x) {
-		ag->link = 0;
-		ag71xx_link_adjust(ag);
-		return;
-	}
-
-	ag71xx_stop(ag->dev);
-	ag71xx_open(ag->dev);
+	rtnl_lock();
+	ag71xx_hw_disable(ag);
+	ag71xx_hw_enable(ag);
+	rtnl_unlock();
  }
  
  static bool ag71xx_check_dma_stuck(struct ag71xx *ag, unsigned long timestamp)
@@ -919,7 +930,7 @@ static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
  		struct sk_buff *skb = ring->buf[i].skb;
  
  		if (!flush && !ag71xx_desc_empty(desc)) {
-			if (pdata->is_ar7240 &&
+			if (pdata->is_ar724x &&
  			    ag71xx_check_dma_stuck(ag, ring->buf[i].timestamp))
  				schedule_work(&ag->restart_work);
  			break;
_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel



More information about the openwrt-devel mailing list