[OpenWrt-Devel] [PATCH] ar71xx: check for stuck DMA on AR724x & fix sirq storm after recovery

Felix Fietkau nbd at openwrt.org
Fri Jan 8 11:49:38 EST 2016


On 2016-01-08 06:26, Conn O'Griofa wrote:
> Hi,
> 
> I'm proposing the following patch to resolve ticket #18922 fully.
> 
> With the current master revision, when a tx timeout condition occurs,
> the interface recovers successfully, but a soft irq storm occurs
> (causing ksoftirqd to peg the CPU, due to this goto being called
> without end:
> https://github.com/openwrt-mirror/openwrt/blob/master/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c#L1073
> ). Forcing the tx and rx rings to be cleared and re-inited in
> ag71xx_restart_work_func seems to avoid the sirq storm, but I'd
> appreciate feedback on whether there's a more effective workaround.
> 
> Additionally, ag71xx_check_dma_stuck *does* successfully detect the
> stuck DMA condition on AR7241 (TR-WL842ND v1), so enabling the check
> for this chipset series ensures a link adjust occurs *before* an
> actual tx timeout is detected. This avoids the brief network
> interruption that normally occurs during the DMA stuck -> tx timeout
> -> link adjust condition.
> 
> Conn
> 
> P.S. The sirq storm also occurs when ag71xx_check_dma_stuck is utilized on this chipset to avoid the tx timeout condition, so it appears that both changes are necessary (or at least, a better way to solve the sirq storm needs to be discovered).
Thanks for investigating this further. Please try this patch:
---
--- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
+++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
@@ -631,34 +631,61 @@ void ag71xx_link_adjust(struct ag71xx *ag)
 		ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL));
 }
 
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+	int ret;
+
+	ret = ag71xx_rings_init(ag);
+	if (ret)
+		return ret;
+
+	napi_enable(&ag->napi);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+	netif_start_queue(ag->dev);
+
+	return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ag->lock, flags);
+
+	netif_stop_queue(ag->dev);
+
+	ag71xx_hw_stop(ag);
+	ag71xx_dma_reset(ag);
+
+	napi_disable(&ag->napi);
+	del_timer_sync(&ag->oom_timer);
+
+	spin_unlock_irqrestore(&ag->lock, flags);
+
+	ag71xx_rings_cleanup(ag);
+}
+
 static int ag71xx_open(struct net_device *dev)
 {
 	struct ag71xx *ag = netdev_priv(dev);
 	unsigned int max_frame_len;
 	int ret;
 
+	netif_carrier_off(dev);
 	max_frame_len = ag71xx_max_frame_len(dev->mtu);
 	ag->rx_buf_size = max_frame_len + NET_SKB_PAD + NET_IP_ALIGN;
 
 	/* setup max frame length */
 	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
 
-	ret = ag71xx_rings_init(ag);
+	ret = ag71xx_hw_enable(ag);
 	if (ret)
 		goto err;
 
-	napi_enable(&ag->napi);
-
-	netif_carrier_off(dev);
 	ag71xx_phy_start(ag);
 
-	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
-	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
-
-	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
-
-	netif_start_queue(dev);
-
 	return 0;
 
 err:
@@ -669,24 +696,10 @@ err:
 static int ag71xx_stop(struct net_device *dev)
 {
 	struct ag71xx *ag = netdev_priv(dev);
-	unsigned long flags;
 
 	netif_carrier_off(dev);
 	ag71xx_phy_stop(ag);
-
-	spin_lock_irqsave(&ag->lock, flags);
-
-	netif_stop_queue(dev);
-
-	ag71xx_hw_stop(ag);
-	ag71xx_dma_reset(ag);
-
-	napi_disable(&ag->napi);
-	del_timer_sync(&ag->oom_timer);
-
-	spin_unlock_irqrestore(&ag->lock, flags);
-
-	ag71xx_rings_cleanup(ag);
+	ag71xx_hw_disable(ag);
 
 	return 0;
 }
@@ -870,14 +883,10 @@ static void ag71xx_restart_work_func(struct work_struct *work)
 {
 	struct ag71xx *ag = container_of(work, struct ag71xx, restart_work);
 
-	if (ag71xx_get_pdata(ag)->is_ar724x) {
-		ag->link = 0;
-		ag71xx_link_adjust(ag);
-		return;
-	}
-
-	ag71xx_stop(ag->dev);
-	ag71xx_open(ag->dev);
+	rtnl_lock();
+	ag71xx_hw_disable(ag);
+	ag71xx_hw_enable(ag);
+	rtnl_unlock();
 }
 
 static bool ag71xx_check_dma_stuck(struct ag71xx *ag, unsigned long timestamp)
@@ -919,7 +928,7 @@ static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
 		struct sk_buff *skb = ring->buf[i].skb;
 
 		if (!flush && !ag71xx_desc_empty(desc)) {
-			if (pdata->is_ar7240 &&
+			if (pdata->is_ar724x &&
 			    ag71xx_check_dma_stuck(ag, ring->buf[i].timestamp))
 				schedule_work(&ag->restart_work);
 			break;
_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel



More information about the openwrt-devel mailing list