58 files changed, 50144 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644
index 00000000..ef378b5b
--- /dev/null
+++ b/drivers/dma/Kconfig
@@ -0,0 +1,297 @@
+#
+# DMA engine configuration
+#
+
+menuconfig DMADEVICES
+	bool "DMA Engine support"
+	depends on HAS_DMA
+	help
+	  DMA engines can do asynchronous data transfers without
+	  involving the host CPU.  Currently, this framework can be
+	  used to offload memory copies in the network stack and
+	  RAID operations in the MD driver.  This menu only presents
+	  DMA Device drivers supported by the configured arch, it may
+	  be empty in some cases.
+
+config DMADEVICES_DEBUG
+        bool "DMA Engine debugging"
+        depends on DMADEVICES != n
+        help
+          This is an option for use by developers; most people should
+          say N here.  This enables DMA engine core and driver debugging.
+
+config DMADEVICES_VDEBUG
+        bool "DMA Engine verbose debugging"
+        depends on DMADEVICES_DEBUG != n
+        help
+          This is an option for use by developers; most people should
+          say N here.  This enables deeper (more verbose) debugging of
+          the DMA engine core and drivers.
+
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+config INTEL_MID_DMAC
+	tristate "Intel MID DMA support for Peripheral DMA controllers"
+	depends on PCI && X86
+	select DMA_ENGINE
+	default n
+	help
+	  Enable support for the Intel(R) MID DMA engine present
+	  in Intel MID chipsets.
+
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
+config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	bool
+
+config AMBA_PL08X
+	bool "ARM PrimeCell PL080 or PL081 support"
+	depends on ARM_AMBA && EXPERIMENTAL
+	select DMA_ENGINE
+	help
+	  Platform has a PL08x DMAC device
+	  which can provide DMA engine support
+
+config INTEL_IOATDMA
+	tristate "Intel I/OAT DMA support"
+	depends on PCI && X86
+	select DMA_ENGINE
+	select DCA
+	select ASYNC_TX_DISABLE_PQ_VAL_DMA
+	select ASYNC_TX_DISABLE_XOR_VAL_DMA
+	help
+	  Enable support for the Intel(R) I/OAT DMA engine present
+	  in recent Intel Xeon chipsets.
+
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
+config INTEL_IOP_ADMA
+	tristate "Intel IOP ADMA support"
+	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	help
+	  Enable support for the Intel(R) IOP Series RAID engines.
+
+config DW_DMAC
+	tristate "Synopsys DesignWare AHB DMA support"
+	depends on HAVE_CLK
+	select DMA_ENGINE
+	default y if CPU_AT32AP7000
+	help
+	  Support the Synopsys DesignWare AHB DMA controller.  This
+	  can be integrated in chips such as the Atmel AT32ap7000.
+
+config AT_HDMAC
+	tristate "Atmel AHB DMA support"
+	depends on ARCH_AT91
+	select DMA_ENGINE
+	help
+	  Support the Atmel AHB DMA controller.
+
+config FSL_DMA
+	tristate "Freescale Elo and Elo Plus DMA support"
+	depends on FSL_SOC
+	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	---help---
+	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
+	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
+	  Elo Plus is the DMA controller on 85xx and 86xx parts.
+
+config MPC512X_DMA
+	tristate "Freescale MPC512x built-in DMA engine support"
+	depends on PPC_MPC512x || PPC_MPC831x
+	select DMA_ENGINE
+	---help---
+	  Enable support for the Freescale MPC512x built-in DMA engine.
+
+config MV_XOR
+	bool "Marvell XOR engine support"
+	depends on PLAT_ORION
+	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	---help---
+	  Enable support for the Marvell XOR engine.
+
+config MX3_IPU
+	bool "MX3x Image Processing Unit support"
+	depends on ARCH_MXC
+	select DMA_ENGINE
+	default y
+	help
+	  If you plan to use the Image Processing unit in the i.MX3x, say
+	  Y here. If unsure, select Y.
+
+config MX3_IPU_IRQS
+	int "Number of dynamically mapped interrupts for IPU"
+	depends on MX3_IPU
+	range 2 137
+	default 4
+	help
+	  Out of 137 interrupt sources on i.MX31 IPU only very few are used.
+	  To avoid bloating the irq_desc[] array we allocate a sufficient
+	  number of IRQ slots and map them dynamically to specific sources.
+
+config TXX9_DMAC
+	tristate "Toshiba TXx9 SoC DMA support"
+	depends on MACH_TX49XX || MACH_TX39XX
+	select DMA_ENGINE
+	help
+	  Support the TXx9 SoC internal DMA controller.  This can be
+	  integrated in chips such as the Toshiba TX4927/38/39.
+
+config SH_DMAE
+	tristate "Renesas SuperH DMAC support"
+	depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE)
+	depends on !SH_DMA_API
+	select DMA_ENGINE
+	help
+	  Enable support for the Renesas SuperH DMA controllers.
+
+config COH901318
+	bool "ST-Ericsson COH901318 DMA support"
+	select DMA_ENGINE
+	depends on ARCH_U300
+	help
+	  Enable support for ST-Ericsson COH 901 318 DMA.
+
+config STE_DMA40
+	bool "ST-Ericsson DMA40 support"
+	depends on ARCH_U8500
+	select DMA_ENGINE
+	help
+	  Support for ST-Ericsson DMA40 controller
+
+config AMCC_PPC440SPE_ADMA
+	tristate "AMCC PPC440SPe ADMA support"
+	depends on 440SPe || 440SP
+	select DMA_ENGINE
+	select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	help
+	  Enable support for the AMCC PPC440SPe RAID engines.
+
+config TIMB_DMA
+	tristate "Timberdale FPGA DMA support"
+	depends on MFD_TIMBERDALE || HAS_IOMEM
+	select DMA_ENGINE
+	help
+	  Enable support for the Timberdale FPGA DMA engine.
+
+config SIRF_DMA
+	tristate "CSR SiRFprimaII DMA support"
+	depends on ARCH_PRIMA2
+	select DMA_ENGINE
+	help
+	  Enable support for the CSR SiRFprimaII DMA engine.
+
+config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+	bool
+
+config PL330_DMA
+	tristate "DMA API Driver for PL330"
+	select DMA_ENGINE
+	depends on ARM_AMBA
+	help
+	  Select if your platform has one or more PL330 DMACs.
+	  You need to provide platform specific settings via
+	  platform_data for a dma-pl330 device.
+
+config PCH_DMA
+	tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
+	depends on PCI && X86
+	select DMA_ENGINE
+	help
+	  Enable support for Intel EG20T PCH DMA engine.
+
+	  This driver also can be used for LAPIS Semiconductor IOH(Input/
+	  Output Hub), ML7213, ML7223 and ML7831.
+	  ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is
+	  for MP(Media Phone) use and ML7831 IOH is for general purpose use.
+	  ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+	  ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
+config IMX_SDMA
+	tristate "i.MX SDMA support"
+	depends on ARCH_MXC
+	select DMA_ENGINE
+	help
+	  Support the i.MX SDMA engine. This engine is integrated into
+	  Freescale i.MX25/31/35/51/53 chips.
+
+config IMX_DMA
+	tristate "i.MX DMA support"
+	depends on ARCH_MXC
+	select DMA_ENGINE
+	help
+	  Support the i.MX DMA engine. This engine is integrated into
+	  Freescale i.MX1/21/27 chips.
+
+config MXS_DMA
+	bool "MXS DMA support"
+	depends on SOC_IMX23 || SOC_IMX28
+	select DMA_ENGINE
+	help
+	  Support the MXS DMA engine. This engine including APBH-DMA
+	  and APBX-DMA is integrated into Freescale i.MX23/28 chips.
+
+config EP93XX_DMA
+	bool "Cirrus Logic EP93xx DMA support"
+	depends on ARCH_EP93XX
+	select DMA_ENGINE
+	help
+	  Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller.
+
+config DMA_SA11X0
+	tristate "SA-11x0 DMA support"
+	depends on ARCH_SA1100
+	select DMA_ENGINE
+	help
+	  Support the DMA engine found on Intel StrongARM SA-1100 and
+	  SA-1110 SoCs.  This DMA engine can only be used with on-chip
+	  devices.
+
+config DMA_ENGINE
+	bool
+
+comment "DMA Clients"
+	depends on DMA_ENGINE
+
+config NET_DMA
+	bool "Network: TCP receive copy offload"
+	depends on DMA_ENGINE && NET
+	default (INTEL_IOATDMA || FSL_DMA)
+	help
+	  This enables the use of DMA engines in the network stack to
+	  offload receive copy-to-user operations, freeing CPU cycles.
+
+	  Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
+	  say N.
+
+config ASYNC_TX_DMA
+	bool "Async_tx: Offload support for the async_tx api"
+	depends on DMA_ENGINE
+	help
+	  This allows the async_tx api to take advantage of offload engines for
+	  memcpy, memset, xor, and raid6 p+q operations.  If your platform has
+	  a dma engine that can perform raid operations and you have enabled
+	  MD_RAID456 say Y.
+
+	  If unsure, say N.
+
+config DMATEST
+	tristate "DMA Test client"
+	depends on DMA_ENGINE
+	help
+	  Simple DMA test client. Say N unless you're debugging a
+	  DMA Device driver.
+
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644
index 00000000..86b795ba
--- /dev/null
+++ b/drivers/dma/Makefile
@@ -0,0 +1,30 @@
+ccflags-$(CONFIG_DMADEVICES_DEBUG)  := -DDEBUG
+ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
+
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
+obj-$(CONFIG_DMATEST) += dmatest.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_DW_DMAC) += dw_dmac.o
+obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_MX3_IPU) += ipu/
+obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
+obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
+obj-$(CONFIG_IMX_DMA) += imx-dma.o
+obj-$(CONFIG_MXS_DMA) += mxs-dma.o
+obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
+obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
+obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
+obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
+obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
diff --git a/drivers/dma/TODO b/drivers/dma/TODO
new file mode 100644
index 00000000..734ed020
--- /dev/null
+++ b/drivers/dma/TODO
@@ -0,0 +1,13 @@
+TODO for slave dma
+
+1. Move remaining drivers to use new slave interface
+2. Remove old slave pointer machansim
+3. Make issue_pending to start the transaction in below drivers
+	- mpc512x_dma
+	- imx-dma
+	- imx-sdma
+	- mxs-dma.c
+	- dw_dmac
+	- intel_mid_dma
+4. Check other subsystems for dma drivers and merge/move to dmaengine
+5. Remove dma_slave_config's dma direction.
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
new file mode 100644
index 00000000..3d704abd
--- /dev/null
+++ b/drivers/dma/amba-pl08x.c
@@ -0,0 +1,2065 @@
+/*
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is in this distribution in the file
+ * called COPYING.
+ *
+ * Documentation: ARM DDI 0196G == PL080
+ * Documentation: ARM DDI 0218E == PL081
+ *
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any
+ * channel.
+ *
+ * The PL080 has 8 channels available for simultaneous use, and the PL081
+ * has only two channels. So on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * The PL080 has a dual bus master, PL081 has a single master.
+ *
+ * Memory to peripheral transfer may be visualized as
+ *	Get data from memory to DMAC
+ *	Until no data left
+ *		On burst request from peripheral
+ *			Destination burst from DMAC to peripheral
+ *			Clear burst request
+ *	Raise terminal count interrupt
+ *
+ * For peripherals with a FIFO:
+ * Source      burst size == half the depth of the peripheral FIFO
+ * Destination burst size == the depth of the peripheral FIFO
+ *
+ * (Bursts are irrelevant for mem to mem transfers - there are no burst
+ * signals, the DMA controller will simply facilitate its AHB master.)
+ *
+ * ASSUMES default (little) endianness for DMA transfers
+ *
+ * The PL08x has two flow control settings:
+ *  - DMAC flow control: the transfer size defines the number of transfers
+ *    which occur for the current LLI entry, and the DMAC raises TC at the
+ *    end of every LLI entry.  Observed behaviour shows the DMAC listening
+ *    to both the BREQ and SREQ signals (contrary to documented),
+ *    transferring data if either is active.  The LBREQ and LSREQ signals
+ *    are ignored.
+ *
+ *  - Peripheral flow control: the transfer size is ignored (and should be
+ *    zero).  The data is transferred from the current LLI entry, until
+ *    after the final transfer signalled by LBREQ or LSREQ.  The DMAC
+ *    will then move to the next LLI entry.
+ *
+ * Global TODO:
+ * - Break out common code from arch/arm/mach-s3c64xx and share
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/pl08x.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <asm/hardware/pl080.h>
+
+#include "dmaengine.h"
+
+#define DRIVER_NAME	"pl08xdmac"
+
+static struct amba_driver pl08x_amba_driver;
+
+/**
+ * struct vendor_data - vendor-specific config parameters for PL08x derivatives
+ * @channels: the number of channels available in this variant
+ * @dualmaster: whether this version supports dual AHB masters or not.
+ */
+struct vendor_data {
+	u8 channels;
+	bool dualmaster;
+};
+
+/*
+ * PL08X private data structures
+ * An LLI struct - see PL08x TRM.  Note that next uses bit[0] as a bus bit,
+ * start & end do not - their bus bit info is in cctl.  Also note that these
+ * are fixed 32-bit quantities.
+ */
+struct pl08x_lli {
+	u32 src;
+	u32 dst;
+	u32 lli;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_driver_data - the local state holder for the PL08x
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @base: virtual memory base (remapped) for the PL08x
+ * @adev: the corresponding AMBA (PrimeCell) bus entry
+ * @vd: vendor data for this PL08x variant
+ * @pd: platform data passed in from the platform/machine
+ * @phy_chans: array of data for the physical channels
+ * @pool: a pool for the LLI descriptors
+ * @pool_ctr: counter of LLIs in the pool
+ * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI
+ * fetches
+ * @mem_buses: set to indicate memory transfers on AHB2.
+ * @lock: a spinlock for this struct
+ */
+struct pl08x_driver_data {
+	struct dma_device slave;
+	struct dma_device memcpy;
+	void __iomem *base;
+	struct amba_device *adev;
+	const struct vendor_data *vd;
+	struct pl08x_platform_data *pd;
+	struct pl08x_phy_chan *phy_chans;
+	struct dma_pool *pool;
+	int pool_ctr;
+	u8 lli_buses;
+	u8 mem_buses;
+	spinlock_t lock;
+};
+
+/*
+ * PL08X specific defines
+ */
+
+/* Size (bytes) of each LLI buffer allocated for one transfer */
+# define PL08X_LLI_TSFR_SIZE	0x2000
+
+/* Maximum times we call dma_pool_alloc on this pool without freeing */
+#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
+#define PL08X_ALIGN		8
+
+static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pl08x_dma_chan, chan);
+}
+
+static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct pl08x_txd, tx);
+}
+
+/*
+ * Physical channel handling
+ */
+
+/* Whether a certain channel is busy or not */
+static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
+{
+	unsigned int val;
+
+	val = readl(ch->base + PL080_CH_CONFIG);
+	return val & PL080_CONFIG_ACTIVE;
+}
+
+/*
+ * Set the initial DMA register values i.e. those for the first LLI
+ * The next LLI pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed.  Poke them into the hardware
+ * and start the transfer.
+ */
+static void pl08x_start_txd(struct pl08x_dma_chan *plchan,
+	struct pl08x_txd *txd)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_lli *lli = &txd->llis_va[0];
+	u32 val;
+
+	plchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(phychan))
+		cpu_relax();
+
+	dev_vdbg(&pl08x->adev->dev,
+		"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+		"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
+		phychan->id, lli->src, lli->dst, lli->lli, lli->cctl,
+		txd->ccfg);
+
+	writel(lli->src, phychan->base + PL080_CH_SRC_ADDR);
+	writel(lli->dst, phychan->base + PL080_CH_DST_ADDR);
+	writel(lli->lli, phychan->base + PL080_CH_LLI);
+	writel(lli->cctl, phychan->base + PL080_CH_CONTROL);
+	writel(txd->ccfg, phychan->base + PL080_CH_CONFIG);
+
+	/* Enable the DMA channel */
+	/* Do not access config register until channel shows as disabled */
+	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << phychan->id))
+		cpu_relax();
+
+	/* Do not access config register until channel shows as inactive */
+	val = readl(phychan->base + PL080_CH_CONFIG);
+	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+		val = readl(phychan->base + PL080_CH_CONFIG);
+
+	writel(val | PL080_CONFIG_ENABLE, phychan->base + PL080_CH_CONFIG);
+}
+
+/*
+ * Pause the channel by setting the HALT bit.
+ *
+ * For M->P transfers, pause the DMAC first and then stop the peripheral -
+ * the FIFO can only drain if the peripheral is still requesting data.
+ * (note: this can still timeout if the DMAC FIFO never drains of data.)
+ *
+ * For P->M transfers, disable the peripheral first to stop it filling
+ * the DMAC FIFO, and then pause the DMAC.
+ */
+static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+	int timeout;
+
+	/* Set the HALT bit and wait for the FIFO to drain */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val |= PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+
+	/* Wait for channel inactive */
+	for (timeout = 1000; timeout; timeout--) {
+		if (!pl08x_phy_channel_busy(ch))
+			break;
+		udelay(1);
+	}
+	if (pl08x_phy_channel_busy(ch))
+		pr_err("pl08x: channel%u timeout waiting for pause\n", ch->id);
+}
+
+static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Clear the HALT bit */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+/*
+ * pl08x_terminate_phy_chan() stops the channel, clears the FIFO and
+ * clears any pending interrupt status.  This should not be used for
+ * an on-going transfer, but as a method of shutting down a channel
+ * (eg, when it's no longer used) or terminating a transfer.
+ */
+static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
+	struct pl08x_phy_chan *ch)
+{
+	u32 val = readl(ch->base + PL080_CH_CONFIG);
+
+	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
+	         PL080_CONFIG_TC_IRQ_MASK);
+
+	writel(val, ch->base + PL080_CH_CONFIG);
+
+	writel(1 << ch->id, pl08x->base + PL080_ERR_CLEAR);
+	writel(1 << ch->id, pl08x->base + PL080_TC_CLEAR);
+}
+
+static inline u32 get_bytes_in_cctl(u32 cctl)
+{
+	/* The source width defines the number of bytes */
+	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+/* The channel should be paused when calling this */
+static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_phy_chan *ch;
+	struct pl08x_txd *txd;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+	ch = plchan->phychan;
+	txd = plchan->at;
+
+	/*
+	 * Follow the LLIs to get the number of remaining
+	 * bytes in the currently active transaction.
+	 */
+	if (ch && txd) {
+		u32 clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
+
+		/* First get the remaining bytes in the active transfer */
+		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+
+		if (clli) {
+			struct pl08x_lli *llis_va = txd->llis_va;
+			dma_addr_t llis_bus = txd->llis_bus;
+			int index;
+
+			BUG_ON(clli < llis_bus || clli >= llis_bus +
+				sizeof(struct pl08x_lli) * MAX_NUM_TSFR_LLIS);
+
+			/*
+			 * Locate the next LLI - as this is an array,
+			 * it's simple maths to find.
+			 */
+			index = (clli - llis_bus) / sizeof(struct pl08x_lli);
+
+			for (; index < MAX_NUM_TSFR_LLIS; index++) {
+				bytes += get_bytes_in_cctl(llis_va[index].cctl);
+
+				/*
+				 * A LLI pointer of 0 terminates the LLI list
+				 */
+				if (!llis_va[index].lli)
+					break;
+			}
+		}
+	}
+
+	/* Sum up all queued transactions */
+	if (!list_empty(&plchan->pend_list)) {
+		struct pl08x_txd *txdi;
+		list_for_each_entry(txdi, &plchan->pend_list, node) {
+			struct pl08x_sg *dsg;
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				bytes += dsg->len;
+		}
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return bytes;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static struct pl08x_phy_chan *
+pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
+		      struct pl08x_dma_chan *virt_chan)
+{
+	struct pl08x_phy_chan *ch = NULL;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+
+		if (!ch->serving) {
+			ch->serving = virt_chan;
+			ch->signal = -1;
+			spin_unlock_irqrestore(&ch->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	if (i == pl08x->vd->channels) {
+		/* No physical channel available, cope with it */
+		return NULL;
+	}
+
+	pm_runtime_get_sync(&pl08x->adev->dev);
+	return ch;
+}
+
+static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x,
+					 struct pl08x_phy_chan *ch)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ch->lock, flags);
+
+	/* Stop the channel and clear its interrupts */
+	pl08x_terminate_phy_chan(pl08x, ch);
+
+	pm_runtime_put(&pl08x->adev->dev);
+
+	/* Mark it as free */
+	ch->serving = NULL;
+	spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+/*
+ * LLI handling
+ */
+
+static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+{
+	switch (coded) {
+	case PL080_WIDTH_8BIT:
+		return 1;
+	case PL080_WIDTH_16BIT:
+		return 2;
+	case PL080_WIDTH_32BIT:
+		return 4;
+	default:
+		break;
+	}
+	BUG();
+	return 0;
+}
+
+static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
+				  size_t tsize)
+{
+	u32 retbits = cctl;
+
+	/* Remove all src, dst and transfer size bits */
+	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	/* Then set the bits according to the parameters */
+	switch (srcwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	switch (dstwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
+	return retbits;
+}
+
+struct pl08x_lli_build_data {
+	struct pl08x_txd *txd;
+	struct pl08x_bus_data srcbus;
+	struct pl08x_bus_data dstbus;
+	size_t remainder;
+	u32 lli_bus;
+};
+
+/*
+ * Autoselect a master bus to use for the transfer. Slave will be the chosen as
+ * victim in case src & dest are not similarly aligned. i.e. If after aligning
+ * masters address with width requirements of transfer (by sending few byte by
+ * byte data), slave is still not aligned, then its width will be reduced to
+ * BYTE.
+ * - prefers the destination bus if both available
+ * - prefers bus with fixed address (i.e. peripheral)
+ */
+static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd,
+	struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl)
+{
+	if (!(cctl & PL080_CONTROL_DST_INCR)) {
+		*mbus = &bd->dstbus;
+		*sbus = &bd->srcbus;
+	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+		*mbus = &bd->srcbus;
+		*sbus = &bd->dstbus;
+	} else {
+		if (bd->dstbus.buswidth >= bd->srcbus.buswidth) {
+			*mbus = &bd->dstbus;
+			*sbus = &bd->srcbus;
+		} else {
+			*mbus = &bd->srcbus;
+			*sbus = &bd->dstbus;
+		}
+	}
+}
+
+/*
+ * Fills in one LLI for a certain transfer descriptor and advance the counter
+ */
+static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
+	int num_llis, int len, u32 cctl)
+{
+	struct pl08x_lli *llis_va = bd->txd->llis_va;
+	dma_addr_t llis_bus = bd->txd->llis_bus;
+
+	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
+
+	llis_va[num_llis].cctl = cctl;
+	llis_va[num_llis].src = bd->srcbus.addr;
+	llis_va[num_llis].dst = bd->dstbus.addr;
+	llis_va[num_llis].lli = llis_bus + (num_llis + 1) *
+		sizeof(struct pl08x_lli);
+	llis_va[num_llis].lli |= bd->lli_bus;
+
+	if (cctl & PL080_CONTROL_SRC_INCR)
+		bd->srcbus.addr += len;
+	if (cctl & PL080_CONTROL_DST_INCR)
+		bd->dstbus.addr += len;
+
+	BUG_ON(bd->remainder < len);
+
+	bd->remainder -= len;
+}
+
+static inline void prep_byte_width_lli(struct pl08x_lli_build_data *bd,
+		u32 *cctl, u32 len, int num_llis, size_t *total_bytes)
+{
+	*cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
+	pl08x_fill_lli_for_desc(bd, num_llis, len, *cctl);
+	(*total_bytes) += len;
+}
+
+/*
+ * This fills in the table of LLIs for the transfer descriptor
+ * Note that we assume we never have to change the burst sizes
+ * Return 0 for error
+ */
+static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
+			      struct pl08x_txd *txd)
+{
+	struct pl08x_bus_data *mbus, *sbus;
+	struct pl08x_lli_build_data bd;
+	int num_llis = 0;
+	u32 cctl, early_bytes = 0;
+	size_t max_bytes_per_lli, total_bytes;
+	struct pl08x_lli *llis_va;
+	struct pl08x_sg *dsg;
+
+	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus);
+	if (!txd->llis_va) {
+		dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
+		return 0;
+	}
+
+	pl08x->pool_ctr++;
+
+	bd.txd = txd;
+	bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0;
+	cctl = txd->cctl;
+
+	/* Find maximum width of the source bus */
+	bd.srcbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				       PL080_CONTROL_SWIDTH_SHIFT);
+
+	/* Find maximum width of the destination bus */
+	bd.dstbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				       PL080_CONTROL_DWIDTH_SHIFT);
+
+	list_for_each_entry(dsg, &txd->dsg_list, node) {
+		total_bytes = 0;
+		cctl = txd->cctl;
+
+		bd.srcbus.addr = dsg->src_addr;
+		bd.dstbus.addr = dsg->dst_addr;
+		bd.remainder = dsg->len;
+		bd.srcbus.buswidth = bd.srcbus.maxwidth;
+		bd.dstbus.buswidth = bd.dstbus.maxwidth;
+
+		pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
+
+		dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu\n",
+			bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
+			bd.srcbus.buswidth,
+			bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "",
+			bd.dstbus.buswidth,
+			bd.remainder);
+		dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
+			mbus == &bd.srcbus ? "src" : "dst",
+			sbus == &bd.srcbus ? "src" : "dst");
+
+		/*
+		 * Zero length is only allowed if all these requirements are
+		 * met:
+		 * - flow controller is peripheral.
+		 * - src.addr is aligned to src.width
+		 * - dst.addr is aligned to dst.width
+		 *
+		 * sg_len == 1 should be true, as there can be two cases here:
+		 *
+		 * - Memory addresses are contiguous and are not scattered.
+		 *   Here, Only one sg will be passed by user driver, with
+		 *   memory address and zero length. We pass this to controller
+		 *   and after the transfer it will receive the last burst
+		 *   request from peripheral and so transfer finishes.
+		 *
+		 * - Memory addresses are scattered and are not contiguous.
+		 *   Here, Obviously as DMA controller doesn't know when a lli's
+		 *   transfer gets over, it can't load next lli. So in this
+		 *   case, there has to be an assumption that only one lli is
+		 *   supported. Thus, we can't have scattered addresses.
+		 */
+		if (!bd.remainder) {
+			u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+				PL080_CONFIG_FLOW_CONTROL_SHIFT;
+			if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
+					(fc <= PL080_FLOW_SRC2DST_SRC))) {
+				dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
+					__func__);
+				return 0;
+			}
+
+			if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
+					(bd.dstbus.addr % bd.dstbus.buswidth)) {
+				dev_err(&pl08x->adev->dev,
+					"%s src & dst address must be aligned to src"
+					" & dst width if peripheral is flow controller",
+					__func__);
+				return 0;
+			}
+
+			cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
+					bd.dstbus.buswidth, 0);
+			pl08x_fill_lli_for_desc(&bd, num_llis++, 0, cctl);
+			break;
+		}
+
+		/*
+		 * Send byte by byte for following cases
+		 * - Less than a bus width available
+		 * - until master bus is aligned
+		 */
+		if (bd.remainder < mbus->buswidth)
+			early_bytes = bd.remainder;
+		else if ((mbus->addr) % (mbus->buswidth)) {
+			early_bytes = mbus->buswidth - (mbus->addr) %
+				(mbus->buswidth);
+			if ((bd.remainder - early_bytes) < mbus->buswidth)
+				early_bytes = bd.remainder;
+		}
+
+		if (early_bytes) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%s byte width LLIs (remain 0x%08x)\n",
+				__func__, bd.remainder);
+			prep_byte_width_lli(&bd, &cctl, early_bytes, num_llis++,
+				&total_bytes);
+		}
+
+		if (bd.remainder) {
+			/*
+			 * Master now aligned
+			 * - if slave is not then we must set its width down
+			 */
+			if (sbus->addr % sbus->buswidth) {
+				dev_dbg(&pl08x->adev->dev,
+					"%s set down bus width to one byte\n",
+					__func__);
+
+				sbus->buswidth = 1;
+			}
+
+			/*
+			 * Bytes transferred = tsize * src width, not
+			 * MIN(buswidths)
+			 */
+			max_bytes_per_lli = bd.srcbus.buswidth *
+				PL080_CONTROL_TRANSFER_SIZE_MASK;
+			dev_vdbg(&pl08x->adev->dev,
+				"%s max bytes per lli = %zu\n",
+				__func__, max_bytes_per_lli);
+
+			/*
+			 * Make largest possible LLIs until less than one bus
+			 * width left
+			 */
+			while (bd.remainder > (mbus->buswidth - 1)) {
+				size_t lli_len, tsize, width;
+
+				/*
+				 * If enough left try to send max possible,
+				 * otherwise try to send the remainder
+				 */
+				lli_len = min(bd.remainder, max_bytes_per_lli);
+
+				/*
+				 * Check against maximum bus alignment:
+				 * Calculate actual transfer size in relation to
+				 * bus width an get a maximum remainder of the
+				 * highest bus width - 1
+				 */
+				width = max(mbus->buswidth, sbus->buswidth);
+				lli_len = (lli_len / width) * width;
+				tsize = lli_len / bd.srcbus.buswidth;
+
+				dev_vdbg(&pl08x->adev->dev,
+					"%s fill lli with single lli chunk of "
+					"size 0x%08zx (remainder 0x%08zx)\n",
+					__func__, lli_len, bd.remainder);
+
+				cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
+					bd.dstbus.buswidth, tsize);
+				pl08x_fill_lli_for_desc(&bd, num_llis++,
+						lli_len, cctl);
+				total_bytes += lli_len;
+			}
+
+			/*
+			 * Send any odd bytes
+			 */
+			if (bd.remainder) {
+				dev_vdbg(&pl08x->adev->dev,
+					"%s align with boundary, send odd bytes (remain %zu)\n",
+					__func__, bd.remainder);
+				prep_byte_width_lli(&bd, &cctl, bd.remainder,
+						num_llis++, &total_bytes);
+			}
+		}
+
+		if (total_bytes != dsg->len) {
+			dev_err(&pl08x->adev->dev,
+				"%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n",
+				__func__, total_bytes, dsg->len);
+			return 0;
+		}
+
+		if (num_llis >= MAX_NUM_TSFR_LLIS) {
+			dev_err(&pl08x->adev->dev,
+				"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
+				__func__, (u32) MAX_NUM_TSFR_LLIS);
+			return 0;
+		}
+	}
+
+	llis_va = txd->llis_va;
+	/* The final LLI terminates the LLI. */
+	llis_va[num_llis - 1].lli = 0;
+	/* The final LLI element shall also fire an interrupt. */
+	llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
+
+#ifdef VERBOSE_DEBUG
+	{
+		int i;
+
+		dev_vdbg(&pl08x->adev->dev,
+			 "%-3s %-9s  %-10s %-10s %-10s %s\n",
+			 "lli", "", "csrc", "cdst", "clli", "cctl");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				 i, &llis_va[i], llis_va[i].src,
+				 llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl
+				);
+		}
+	}
+#endif
+
+	return num_llis;
+}
+
+/* You should call this with the struct pl08x lock held */
+static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
+			   struct pl08x_txd *txd)
+{
+	struct pl08x_sg *dsg, *_dsg;
+
+	/* Free the LLI */
+	if (txd->llis_va)
+		dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus);
+
+	pl08x->pool_ctr--;
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
+				struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *next;
+
+	if (!list_empty(&plchan->pend_list)) {
+		list_for_each_entry_safe(txdi,
+					 next, &plchan->pend_list, node) {
+			list_del(&txdi->node);
+			pl08x_free_txd(pl08x, txdi);
+		}
+	}
+}
+
+/*
+ * The DMA ENGINE API
+ */
+static int pl08x_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void pl08x_free_chan_resources(struct dma_chan *chan)
+{
+}
+
+/*
+ * This should be called with the channel plchan->lock held
+ */
+static int prep_phy_channel(struct pl08x_dma_chan *plchan,
+			    struct pl08x_txd *txd)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *ch;
+	int ret;
+
+	/* Check if we already have a channel */
+	if (plchan->phychan) {
+		ch = plchan->phychan;
+		goto got_channel;
+	}
+
+	ch = pl08x_get_phy_channel(pl08x, plchan);
+	if (!ch) {
+		/* No physical channel available, cope with it */
+		dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name);
+		return -EBUSY;
+	}
+
+	/*
+	 * OK we have a physical channel: for memcpy() this is all we
+	 * need, but for slaves the physical signals may be muxed!
+	 * Can the platform allow us to use this channel?
+	 */
+	if (plchan->slave && pl08x->pd->get_signal) {
+		ret = pl08x->pd->get_signal(plchan);
+		if (ret < 0) {
+			dev_dbg(&pl08x->adev->dev,
+				"unable to use physical channel %d for transfer on %s due to platform restrictions\n",
+				ch->id, plchan->name);
+			/* Release physical channel & return */
+			pl08x_put_phy_channel(pl08x, ch);
+			return -EBUSY;
+		}
+		ch->signal = ret;
+	}
+
+	plchan->phychan = ch;
+	dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n",
+		 ch->id,
+		 ch->signal,
+		 plchan->name);
+
+got_channel:
+	/* Assign the flow control signal to this channel */
+	if (txd->direction == DMA_MEM_TO_DEV)
+		txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
+	else if (txd->direction == DMA_DEV_TO_MEM)
+		txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
+
+	plchan->phychan_hold++;
+
+	return 0;
+}
+
+static void release_phy_channel(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	if ((plchan->phychan->signal >= 0) && pl08x->pd->put_signal) {
+		pl08x->pd->put_signal(plchan);
+		plchan->phychan->signal = -1;
+	}
+	pl08x_put_phy_channel(pl08x, plchan->phychan);
+	plchan->phychan = NULL;
+}
+
+static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
+	struct pl08x_txd *txd = to_pl08x_txd(tx);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	/* Put this onto the pending list */
+	list_add_tail(&txd->node, &plchan->pend_list);
+
+	/*
+	 * If there was no physical channel available for this memcpy,
+	 * stack the request up and indicate that the channel is waiting
+	 * for a free physical channel.
+	 */
+	if (!plchan->slave && !plchan->phychan) {
+		/* Do this memcpy whenever there is a channel ready */
+		plchan->state = PL08X_CHAN_WAITING;
+		plchan->waiting = txd;
+	} else {
+		plchan->phychan_hold--;
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *retval = NULL;
+
+	return retval;
+}
+
+/*
+ * Code accessing dma_async_is_complete() in a tight loop may give problems.
+ * If slaves are relying on interrupts to signal completion this function
+ * must not be called with interrupts disabled.
+ */
+static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, pl08x_getbytes_chan(plchan));
+
+	if (plchan->state == PL08X_CHAN_PAUSED)
+		return DMA_PAUSED;
+
+	/* Whether waiting or running, we're in progress */
+	return DMA_IN_PROGRESS;
+}
+
+/* PrimeCell DMA extension */
+struct burst_table {
+	u32 burstwords;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burstwords = 256,
+		.reg = PL080_BSIZE_256,
+	},
+	{
+		.burstwords = 128,
+		.reg = PL080_BSIZE_128,
+	},
+	{
+		.burstwords = 64,
+		.reg = PL080_BSIZE_64,
+	},
+	{
+		.burstwords = 32,
+		.reg = PL080_BSIZE_32,
+	},
+	{
+		.burstwords = 16,
+		.reg = PL080_BSIZE_16,
+	},
+	{
+		.burstwords = 8,
+		.reg = PL080_BSIZE_8,
+	},
+	{
+		.burstwords = 4,
+		.reg = PL080_BSIZE_4,
+	},
+	{
+		.burstwords = 0,
+		.reg = PL080_BSIZE_1,
+	},
+};
+
+/*
+ * Given the source and destination available bus masks, select which
+ * will be routed to each port.  We try to have source and destination
+ * on separate ports, but always respect the allowable settings.
+ */
+static u32 pl08x_select_bus(u8 src, u8 dst)
+{
+	u32 cctl = 0;
+
+	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
+		cctl |= PL080_CONTROL_DST_AHB2;
+	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
+		cctl |= PL080_CONTROL_SRC_AHB2;
+
+	return cctl;
+}
+
+static u32 pl08x_cctl(u32 cctl)
+{
+	cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
+		  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
+		  PL080_CONTROL_PROT_MASK);
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	return cctl | PL080_CONTROL_PROT_SYS;
+}
+
+static u32 pl08x_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return PL080_WIDTH_8BIT;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return PL080_WIDTH_16BIT;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return PL080_WIDTH_32BIT;
+	default:
+		return ~0;
+	}
+}
+
+static u32 pl08x_burst(u32 maxburst)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
+		if (burst_sizes[i].burstwords <= maxburst)
+			break;
+
+	return burst_sizes[i].reg;
+}
+
+static int dma_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	enum dma_slave_buswidth addr_width;
+	u32 width, burst, maxburst;
+	u32 cctl = 0;
+
+	if (!plchan->slave)
+		return -EINVAL;
+
+	/* Transfer direction */
+	plchan->runtime_direction = config->direction;
+	if (config->direction == DMA_MEM_TO_DEV) {
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else if (config->direction == DMA_DEV_TO_MEM) {
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien transfer direction\n");
+		return -EINVAL;
+	}
+
+	width = pl08x_width(addr_width);
+	if (width == ~0) {
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien address width\n");
+		return -EINVAL;
+	}
+
+	cctl |= width << PL080_CONTROL_SWIDTH_SHIFT;
+	cctl |= width << PL080_CONTROL_DWIDTH_SHIFT;
+
+	/*
+	 * If this channel will only request single transfers, set this
+	 * down to ONE element.  Also select one element if no maxburst
+	 * is specified.
+	 */
+	if (plchan->cd->single)
+		maxburst = 1;
+
+	burst = pl08x_burst(maxburst);
+	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
+	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
+
+	plchan->device_fc = config->device_fc;
+
+	if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
+		plchan->src_addr = config->src_addr;
+		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
+			pl08x_select_bus(plchan->cd->periph_buses,
+					 pl08x->mem_buses);
+	} else {
+		plchan->dst_addr = config->dst_addr;
+		plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR |
+			pl08x_select_bus(pl08x->mem_buses,
+					 plchan->cd->periph_buses);
+	}
+
+	dev_dbg(&pl08x->adev->dev,
+		"configured channel %s (%s) for %s, data width %d, "
+		"maxburst %d words, LE, CCTL=0x%08x\n",
+		dma_chan_name(chan), plchan->name,
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
+		addr_width,
+		maxburst,
+		cctl);
+
+	return 0;
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void pl08x_issue_pending(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+	/* Something is already active, or we're waiting for a channel... */
+	if (plchan->at || plchan->state == PL08X_CHAN_WAITING) {
+		spin_unlock_irqrestore(&plchan->lock, flags);
+		return;
+	}
+
+	/* Take the first element in the queue and execute it */
+	if (!list_empty(&plchan->pend_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->pend_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->state = PL08X_CHAN_RUNNING;
+
+		pl08x_start_txd(plchan, next);
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+}
+
+static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
+					struct pl08x_txd *txd)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+	int num_llis, ret;
+
+	num_llis = pl08x_fill_llis_for_desc(pl08x, txd);
+	if (!num_llis) {
+		spin_lock_irqsave(&plchan->lock, flags);
+		pl08x_free_txd(pl08x, txd);
+		spin_unlock_irqrestore(&plchan->lock, flags);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&plchan->lock, flags);
+
+	/*
+	 * See if we already have a physical channel allocated,
+	 * else this is the time to try to get one.
+	 */
+	ret = prep_phy_channel(plchan, txd);
+	if (ret) {
+		/*
+		 * No physical channel was available.
+		 *
+		 * memcpy transfers can be sorted out at submission time.
+		 *
+		 * Slave transfers may have been denied due to platform
+		 * channel muxing restrictions.  Since there is no guarantee
+		 * that this will ever be resolved, and the signal must be
+		 * acquired AFTER acquiring the physical channel, we will let
+		 * them be NACK:ed with -EBUSY here. The drivers can retry
+		 * the prep() call if they are eager on doing this using DMA.
+		 */
+		if (plchan->slave) {
+			pl08x_free_txd_list(pl08x, plchan);
+			pl08x_free_txd(pl08x, txd);
+			spin_unlock_irqrestore(&plchan->lock, flags);
+			return -EBUSY;
+		}
+	} else
+		/*
+		 * Else we're all set, paused and ready to roll, status
+		 * will switch to PL08X_CHAN_RUNNING when we call
+		 * issue_pending(). If there is something running on the
+		 * channel already we don't change its state.
+		 */
+		if (plchan->state == PL08X_CHAN_IDLE)
+			plchan->state = PL08X_CHAN_PAUSED;
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return 0;
+}
+
+static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
+	unsigned long flags)
+{
+	struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd) {
+		dma_async_tx_descriptor_init(&txd->tx, &plchan->chan);
+		txd->tx.flags = flags;
+		txd->tx.tx_submit = pl08x_tx_submit;
+		INIT_LIST_HEAD(&txd->node);
+		INIT_LIST_HEAD(&txd->dsg_list);
+
+		/* Always enable error and terminal interrupts */
+		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+			    PL080_CONFIG_TC_IRQ_MASK;
+	}
+	return txd;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct pl08x_sg *dsg;
+	int ret;
+
+	txd = pl08x_get_txd(plchan, flags);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg) {
+		pl08x_free_txd(pl08x, txd);
+		dev_err(&pl08x->adev->dev, "%s no memory for pl080 sg\n",
+				__func__);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	txd->direction = DMA_NONE;
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+
+	/* Set platform data for m2m */
+	txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+	txd->cctl = pl08x->pd->memcpy_channel.cctl &
+			~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+
+	/* Both to be incremented or the code will break */
+	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+
+	if (pl08x->vd->dualmaster)
+		txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
+					      pl08x->mem_buses);
+
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+
+	return &txd->tx;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct pl08x_sg *dsg;
+	struct scatterlist *sg;
+	dma_addr_t slave_addr;
+	int ret, tmp;
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+			__func__, sgl->length, plchan->name);
+
+	txd = pl08x_get_txd(plchan, flags);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
+		return NULL;
+	}
+
+	if (direction != plchan->runtime_direction)
+		dev_err(&pl08x->adev->dev, "%s DMA setup does not match "
+			"the direction configured for the PrimeCell\n",
+			__func__);
+
+	/*
+	 * Set up addresses, the PrimeCell configured address
+	 * will take precedence since this may configure the
+	 * channel target address dynamically at runtime.
+	 */
+	txd->direction = direction;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->cctl = plchan->dst_cctl;
+		slave_addr = plchan->dst_addr;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		txd->cctl = plchan->src_cctl;
+		slave_addr = plchan->src_addr;
+	} else {
+		pl08x_free_txd(pl08x, txd);
+		dev_err(&pl08x->adev->dev,
+			"%s direction unsupported\n", __func__);
+		return NULL;
+	}
+
+	if (plchan->device_fc)
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
+			PL080_FLOW_PER2MEM_PER;
+	else
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
+			PL080_FLOW_PER2MEM;
+
+	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+		if (!dsg) {
+			pl08x_free_txd(pl08x, txd);
+			dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n",
+					__func__);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = sg_dma_len(sg);
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = sg_phys(sg);
+			dsg->dst_addr = slave_addr;
+		} else {
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = sg_phys(sg);
+		}
+	}
+
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+
+	return &txd->tx;
+}
+
+static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	/* Controls applicable to inactive channels */
+	if (cmd == DMA_SLAVE_CONFIG) {
+		return dma_set_runtime_config(chan,
+					      (struct dma_slave_config *)arg);
+	}
+
+	/*
+	 * Anything succeeds on channels with no physical allocation and
+	 * no queued transfers.
+	 */
+	spin_lock_irqsave(&plchan->lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->lock, flags);
+		return 0;
+	}
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		plchan->state = PL08X_CHAN_IDLE;
+
+		if (plchan->phychan) {
+			pl08x_terminate_phy_chan(pl08x, plchan->phychan);
+
+			/*
+			 * Mark physical channel as free and free any slave
+			 * signal
+			 */
+			release_phy_channel(plchan);
+			plchan->phychan_hold = 0;
+		}
+		/* Dequeue jobs and free LLIs */
+		if (plchan->at) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/* Dequeue jobs not yet fired as well */
+		pl08x_free_txd_list(pl08x, plchan);
+		break;
+	case DMA_PAUSE:
+		pl08x_pause_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_PAUSED;
+		break;
+	case DMA_RESUME:
+		pl08x_resume_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_RUNNING;
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return ret;
+}
+
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan;
+	char *name = chan_id;
+
+	/* Reject channels for devices not bound to this driver */
+	if (chan->device->dev->driver != &pl08x_amba_driver.drv)
+		return false;
+
+	plchan = to_pl08x_chan(chan);
+
+	/* Check that the channel is not taken! */
+	if (!strcmp(plchan->name, name))
+		return true;
+
+	return false;
+}
+
+/*
+ * Just check that the device is there and active
+ * TODO: turn this bit on/off depending on the number of physical channels
+ * actually used, if it is zero... well shut it off. That will save some
+ * power. Cut the clock at the same time.
+ */
+static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
+{
+	writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
+}
+
+static void pl08x_unmap_buffers(struct pl08x_txd *txd)
+{
+	struct device *dev = txd->tx.chan->device->dev;
+	struct pl08x_sg *dsg;
+
+	if (!(txd->tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		else {
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		}
+	}
+	if (!(txd->tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+		else
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+	}
+}
+
+static void pl08x_tasklet(unsigned long data)
+{
+	struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data;
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+
+	txd = plchan->at;
+	plchan->at = NULL;
+
+	if (txd) {
+		/* Update last completed */
+		dma_cookie_complete(&txd->tx);
+	}
+
+	/* If a new descriptor is queued, set it up plchan->at is NULL here */
+	if (!list_empty(&plchan->pend_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->pend_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+
+		pl08x_start_txd(plchan, next);
+	} else if (plchan->phychan_hold) {
+		/*
+		 * This channel is still in use - we have a new txd being
+		 * prepared and will soon be queued.  Don't give up the
+		 * physical channel.
+		 */
+	} else {
+		struct pl08x_dma_chan *waiting = NULL;
+
+		/*
+		 * No more jobs, so free up the physical channel
+		 * Free any allocated signal on slave transfers too
+		 */
+		release_phy_channel(plchan);
+		plchan->state = PL08X_CHAN_IDLE;
+
+		/*
+		 * And NOW before anyone else can grab that free:d up
+		 * physical channel, see if there is some memcpy pending
+		 * that seriously needs to start because of being stacked
+		 * up while we were choking the physical channels with data.
+		 */
+		list_for_each_entry(waiting, &pl08x->memcpy.channels,
+				    chan.device_node) {
+			if (waiting->state == PL08X_CHAN_WAITING &&
+				waiting->waiting != NULL) {
+				int ret;
+
+				/* This should REALLY not fail now */
+				ret = prep_phy_channel(waiting,
+						       waiting->waiting);
+				BUG_ON(ret);
+				waiting->phychan_hold--;
+				waiting->state = PL08X_CHAN_RUNNING;
+				waiting->waiting = NULL;
+				pl08x_issue_pending(&waiting->chan);
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	if (txd) {
+		dma_async_tx_callback callback = txd->tx.callback;
+		void *callback_param = txd->tx.callback_param;
+
+		/* Don't try to unmap buffers on slave channels */
+		if (!plchan->slave)
+			pl08x_unmap_buffers(txd);
+
+		/* Free the descriptor */
+		spin_lock_irqsave(&plchan->lock, flags);
+		pl08x_free_txd(pl08x, txd);
+		spin_unlock_irqrestore(&plchan->lock, flags);
+
+		/* Callback to signal completion */
+		if (callback)
+			callback(callback_param);
+	}
+}
+
+static irqreturn_t pl08x_irq(int irq, void *dev)
+{
+	struct pl08x_driver_data *pl08x = dev;
+	u32 mask = 0, err, tc, i;
+
+	/* check & clear - ERR & TC interrupts */
+	err = readl(pl08x->base + PL080_ERR_STATUS);
+	if (err) {
+		dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n",
+			__func__, err);
+		writel(err, pl08x->base + PL080_ERR_CLEAR);
+	}
+	tc = readl(pl08x->base + PL080_INT_STATUS);
+	if (tc)
+		writel(tc, pl08x->base + PL080_TC_CLEAR);
+
+	if (!err && !tc)
+		return IRQ_NONE;
+
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		if (((1 << i) & err) || ((1 << i) & tc)) {
+			/* Locate physical channel */
+			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
+			struct pl08x_dma_chan *plchan = phychan->serving;
+
+			if (!plchan) {
+				dev_err(&pl08x->adev->dev,
+					"%s Error TC interrupt on unused channel: 0x%08x\n",
+					__func__, i);
+				continue;
+			}
+
+			/* Schedule tasklet on this channel */
+			tasklet_schedule(&plchan->tasklet);
+			mask |= (1 << i);
+		}
+	}
+
+	return mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan)
+{
+	u32 cctl = pl08x_cctl(chan->cd->cctl);
+
+	chan->slave = true;
+	chan->name = chan->cd->bus_id;
+	chan->src_addr = chan->cd->addr;
+	chan->dst_addr = chan->cd->addr;
+	chan->src_cctl = cctl | PL080_CONTROL_DST_INCR |
+		pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses);
+	chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR |
+		pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses);
+}
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct pl08x_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(&pl08x->adev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->host = pl08x;
+		chan->state = PL08X_CHAN_IDLE;
+
+		if (slave) {
+			chan->cd = &pl08x->pd->slave_channels[i];
+			pl08x_dma_slave_init(chan);
+		} else {
+			chan->cd = &pl08x->pd->memcpy_channel;
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+		}
+		if (chan->cd->circular_buffer) {
+			dev_err(&pl08x->adev->dev,
+				"channel %s: circular buffers not supported\n",
+				chan->name);
+			kfree(chan);
+			continue;
+		}
+		dev_dbg(&pl08x->adev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->chan.device = dmadev;
+		dma_cookie_init(&chan->chan);
+
+		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->pend_list);
+		tasklet_init(&chan->tasklet, pl08x_tasklet,
+			     (unsigned long) chan);
+
+		list_add_tail(&chan->chan.device_node, &dmadev->channels);
+	}
+	dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void pl08x_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct pl08x_dma_chan *chan = NULL;
+	struct pl08x_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, chan.device_node) {
+		list_del(&chan->chan.device_node);
+		kfree(chan);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *pl08x_state_str(enum pl08x_dma_chan_state state)
+{
+	switch (state) {
+	case PL08X_CHAN_IDLE:
+		return "idle";
+	case PL08X_CHAN_RUNNING:
+		return "running";
+	case PL08X_CHAN_PAUSED:
+		return "paused";
+	case PL08X_CHAN_WAITING:
+		return "waiting";
+	default:
+		break;
+	}
+	return "UNKNOWN STATE";
+}
+
+static int pl08x_debugfs_show(struct seq_file *s, void *data)
+{
+	struct pl08x_driver_data *pl08x = s->private;
+	struct pl08x_dma_chan *chan;
+	struct pl08x_phy_chan *ch;
+	unsigned long flags;
+	int i;
+
+	seq_printf(s, "PL08x physical channels:\n");
+	seq_printf(s, "CHANNEL:\tUSER:\n");
+	seq_printf(s, "--------\t-----\n");
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		struct pl08x_dma_chan *virt_chan;
+
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+		virt_chan = ch->serving;
+
+		seq_printf(s, "%d\t\t%s\n",
+			   ch->id, virt_chan ? virt_chan->name : "(none)");
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	seq_printf(s, "\nPL08x virtual memcpy channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	seq_printf(s, "\nPL08x virtual slave channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	return 0;
+}
+
+static int pl08x_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pl08x_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations pl08x_debugfs_operations = {
+	.open		= pl08x_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+	/* Expose a simple debugfs interface to view all clocks */
+	(void) debugfs_create_file(dev_name(&pl08x->adev->dev),
+			S_IFREG | S_IRUGO, NULL, pl08x,
+			&pl08x_debugfs_operations);
+}
+
+#else
+static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+}
+#endif
+
+static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	struct pl08x_driver_data *pl08x;
+	const struct vendor_data *vd = id->data;
+	int ret = 0;
+	int i;
+
+	ret = amba_request_regions(adev, NULL);
+	if (ret)
+		return ret;
+
+	/* Create the driver state holder */
+	pl08x = kzalloc(sizeof(*pl08x), GFP_KERNEL);
+	if (!pl08x) {
+		ret = -ENOMEM;
+		goto out_no_pl08x;
+	}
+
+	pm_runtime_set_active(&adev->dev);
+	pm_runtime_enable(&adev->dev);
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
+	pl08x->memcpy.dev = &adev->dev;
+	pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
+	pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->memcpy.device_tx_status = pl08x_dma_tx_status;
+	pl08x->memcpy.device_issue_pending = pl08x_issue_pending;
+	pl08x->memcpy.device_control = pl08x_control;
+
+	/* Initialize slave engine */
+	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+	pl08x->slave.dev = &adev->dev;
+	pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+	pl08x->slave.device_issue_pending = pl08x_issue_pending;
+	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+	pl08x->slave.device_control = pl08x_control;
+
+	/* Get the platform data */
+	pl08x->pd = dev_get_platdata(&adev->dev);
+	if (!pl08x->pd) {
+		dev_err(&adev->dev, "no platform data supplied\n");
+		goto out_no_platdata;
+	}
+
+	/* Assign useful pointers to the driver state */
+	pl08x->adev = adev;
+	pl08x->vd = vd;
+
+	/* By default, AHB1 only.  If dualmaster, from platform */
+	pl08x->lli_buses = PL08X_AHB1;
+	pl08x->mem_buses = PL08X_AHB1;
+	if (pl08x->vd->dualmaster) {
+		pl08x->lli_buses = pl08x->pd->lli_buses;
+		pl08x->mem_buses = pl08x->pd->mem_buses;
+	}
+
+	/* A DMA memory pool for LLIs, align on 1-byte boundary */
+	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
+			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
+	if (!pl08x->pool) {
+		ret = -ENOMEM;
+		goto out_no_lli_pool;
+	}
+
+	spin_lock_init(&pl08x->lock);
+
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	/* Turn on the PL08x */
+	pl08x_ensure_on(pl08x);
+
+	/* Attach the interrupt handler */
+	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+
+	ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
+			  DRIVER_NAME, pl08x);
+	if (ret) {
+		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
+			__func__, adev->irq[0]);
+		goto out_no_irq;
+	}
+
+	/* Initialize physical channels */
+	pl08x->phy_chans = kmalloc((vd->channels * sizeof(*pl08x->phy_chans)),
+			GFP_KERNEL);
+	if (!pl08x->phy_chans) {
+		dev_err(&adev->dev, "%s failed to allocate "
+			"physical channel holders\n",
+			__func__);
+		goto out_no_phychans;
+	}
+
+	for (i = 0; i < vd->channels; i++) {
+		struct pl08x_phy_chan *ch = &pl08x->phy_chans[i];
+
+		ch->id = i;
+		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		spin_lock_init(&ch->lock);
+		ch->serving = NULL;
+		ch->signal = -1;
+		dev_dbg(&adev->dev, "physical channel %d is %s\n",
+			i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+	}
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy,
+					      pl08x->vd->channels, false);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto out_no_memcpy;
+	}
+	pl08x->memcpy.chancnt = ret;
+
+	/* Register slave channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+			pl08x->pd->num_slave_channels, true);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto out_no_slave;
+	}
+	pl08x->slave.chancnt = ret;
+
+	ret = dma_async_device_register(&pl08x->memcpy);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto out_no_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&pl08x->slave);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto out_no_slave_reg;
+	}
+
+	amba_set_drvdata(adev, pl08x);
+	init_pl08x_debugfs(pl08x);
+	dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n",
+		 amba_part(adev), amba_rev(adev),
+		 (unsigned long long)adev->res.start, adev->irq[0]);
+
+	pm_runtime_put(&adev->dev);
+	return 0;
+
+out_no_slave_reg:
+	dma_async_device_unregister(&pl08x->memcpy);
+out_no_memcpy_reg:
+	pl08x_free_virtual_channels(&pl08x->slave);
+out_no_slave:
+	pl08x_free_virtual_channels(&pl08x->memcpy);
+out_no_memcpy:
+	kfree(pl08x->phy_chans);
+out_no_phychans:
+	free_irq(adev->irq[0], pl08x);
+out_no_irq:
+	iounmap(pl08x->base);
+out_no_ioremap:
+	dma_pool_destroy(pl08x->pool);
+out_no_lli_pool:
+out_no_platdata:
+	pm_runtime_put(&adev->dev);
+	pm_runtime_disable(&adev->dev);
+
+	kfree(pl08x);
+out_no_pl08x:
+	amba_release_regions(adev);
+	return ret;
+}
+
+/* PL080 has 8 channels and the PL080 have just 2 */
+static struct vendor_data vendor_pl080 = {
+	.channels = 8,
+	.dualmaster = true,
+};
+
+static struct vendor_data vendor_pl081 = {
+	.channels = 2,
+	.dualmaster = false,
+};
+
+static struct amba_id pl08x_ids[] = {
+	/* PL080 */
+	{
+		.id	= 0x00041080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl080,
+	},
+	/* PL081 */
+	{
+		.id	= 0x00041081,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl081,
+	},
+	/* Nomadik 8815 PL080 variant */
+	{
+		.id	= 0x00280880,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_pl080,
+	},
+	{ 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl08x_ids);
+
+static struct amba_driver pl08x_amba_driver = {
+	.drv.name	= DRIVER_NAME,
+	.id_table	= pl08x_ids,
+	.probe		= pl08x_probe,
+};
+
+static int __init pl08x_init(void)
+{
+	int retval;
+	retval = amba_driver_register(&pl08x_amba_driver);
+	if (retval)
+		printk(KERN_WARNING DRIVER_NAME
+		       "failed to register as an AMBA device (%d)\n",
+		       retval);
+	return retval;
+}
+subsys_initcall(pl08x_init);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
new file mode 100644
index 00000000..bf0d7e4e
--- /dev/null
+++ b/drivers/dma/at_hdmac.c
@@ -0,0 +1,1583 @@
+/*
+ * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems)
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * This supports the Atmel AHB DMA Controller,
+ *
+ * The driver has currently been tested with the Atmel AT91SAM9RL
+ * and AT91SAM9G45 series.
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "at_hdmac_regs.h"
+#include "dmaengine.h"
+
+/*
+ * Glossary
+ * --------
+ *
+ * at_hdmac		: Name of the ATmel AHB DMA Controller
+ * at_dma_ / atdma	: ATmel DMA controller entity related
+ * atc_	/ atchan	: ATmel DMA Channel entity related
+ */
+
+#define	ATC_DEFAULT_CFG		(ATC_FIFOCFG_HALFFIFO)
+#define	ATC_DEFAULT_CTRLA	(0)
+#define	ATC_DEFAULT_CTRLB	(ATC_SIF(AT_DMA_MEM_IF) \
+				|ATC_DIF(AT_DMA_MEM_IF))
+
+/*
+ * Initial number of descriptors to allocate for each channel. This could
+ * be increased during dma usage.
+ */
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+
+/* prototypes */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx);
+
+
+/*----------------------------------------------------------------------*/
+
+static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
+{
+	return list_first_entry(&atchan->active_list,
+				struct at_desc, desc_node);
+}
+
+static struct at_desc *atc_first_queued(struct at_dma_chan *atchan)
+{
+	return list_first_entry(&atchan->queue,
+				struct at_desc, desc_node);
+}
+
+/**
+ * atc_alloc_descriptor - allocate and return an initialized descriptor
+ * @chan: the channel to allocate descriptors for
+ * @gfp_flags: GFP allocation flags
+ *
+ * Note: The ack-bit is positioned in the descriptor flag at creation time
+ *       to make initial allocation more convenient. This bit will be cleared
+ *       and control will be given to client at usage time (during
+ *       preparation functions).
+ */
+static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
+					    gfp_t gfp_flags)
+{
+	struct at_desc	*desc = NULL;
+	struct at_dma	*atdma = to_at_dma(chan->device);
+	dma_addr_t phys;
+
+	desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
+	if (desc) {
+		memset(desc, 0, sizeof(struct at_desc));
+		INIT_LIST_HEAD(&desc->tx_list);
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		/* txd.flags will be overwritten in prep functions */
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.tx_submit = atc_tx_submit;
+		desc->txd.phys = phys;
+	}
+
+	return desc;
+}
+
+/**
+ * atc_desc_get - get an unused descriptor from free_list
+ * @atchan: channel we want a new descriptor for
+ */
+static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
+{
+	struct at_desc *desc, *_desc;
+	struct at_desc *ret = NULL;
+	unsigned long flags;
+	unsigned int i = 0;
+	LIST_HEAD(tmp_list);
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+		i++;
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&atchan->chan_common),
+				"desc %p not ACKed\n", desc);
+	}
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	dev_vdbg(chan2dev(&atchan->chan_common),
+		"scanned %u descriptors on freelist\n", i);
+
+	/* no more descriptor available in initial pool: create one more */
+	if (!ret) {
+		ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC);
+		if (ret) {
+			spin_lock_irqsave(&atchan->lock, flags);
+			atchan->descs_allocated++;
+			spin_unlock_irqrestore(&atchan->lock, flags);
+		} else {
+			dev_err(chan2dev(&atchan->chan_common),
+					"not enough descriptors available\n");
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * atc_desc_put - move a descriptor, including any children, to the free list
+ * @atchan: channel we work on
+ * @desc: descriptor, at the head of a chain, to move to free list
+ */
+static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+	if (desc) {
+		struct at_desc *child;
+		unsigned long flags;
+
+		spin_lock_irqsave(&atchan->lock, flags);
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			dev_vdbg(chan2dev(&atchan->chan_common),
+					"moving child desc %p to freelist\n",
+					child);
+		list_splice_init(&desc->tx_list, &atchan->free_list);
+		dev_vdbg(chan2dev(&atchan->chan_common),
+			 "moving desc %p to freelist\n", desc);
+		list_add(&desc->desc_node, &atchan->free_list);
+		spin_unlock_irqrestore(&atchan->lock, flags);
+	}
+}
+
+/**
+ * atc_desc_chain - build chain adding a descripor
+ * @first: address of first descripor of the chain
+ * @prev: address of previous descripor of the chain
+ * @desc: descriptor to queue
+ *
+ * Called from prep_* functions
+ */
+static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
+			   struct at_desc *desc)
+{
+	if (!(*first)) {
+		*first = desc;
+	} else {
+		/* inform the HW lli about chaining */
+		(*prev)->lli.dscr = desc->txd.phys;
+		/* insert the link descriptor to the LD ring */
+		list_add_tail(&desc->desc_node,
+				&(*first)->tx_list);
+	}
+	*prev = desc;
+}
+
+/**
+ * atc_dostart - starts the DMA engine for real
+ * @atchan: the channel we want to start
+ * @first: first descriptor in the list we want to begin with
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	/* ASSERT:  channel is idle */
+	if (atc_chan_is_enabled(atchan)) {
+		dev_err(chan2dev(&atchan->chan_common),
+			"BUG: Attempted to start non-idle channel\n");
+		dev_err(chan2dev(&atchan->chan_common),
+			"  channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
+			channel_readl(atchan, SADDR),
+			channel_readl(atchan, DADDR),
+			channel_readl(atchan, CTRLA),
+			channel_readl(atchan, CTRLB),
+			channel_readl(atchan, DSCR));
+
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	vdbg_dump_regs(atchan);
+
+	channel_writel(atchan, SADDR, 0);
+	channel_writel(atchan, DADDR, 0);
+	channel_writel(atchan, CTRLA, 0);
+	channel_writel(atchan, CTRLB, 0);
+	channel_writel(atchan, DSCR, first->txd.phys);
+	dma_writel(atdma, CHER, atchan->mask);
+
+	vdbg_dump_regs(atchan);
+}
+
+/**
+ * atc_chain_complete - finish work for one transaction chain
+ * @atchan: channel we work on
+ * @desc: descriptor at the head of the chain we want do complete
+ *
+ * Called with atchan->lock held and bh disabled */
+static void
+atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+
+	dev_vdbg(chan2dev(&atchan->chan_common),
+		"descriptor %u complete\n", txd->cookie);
+
+	/* mark the descriptor as complete for non cyclic cases only */
+	if (!atc_chan_is_cyclic(atchan))
+		dma_cookie_complete(txd);
+
+	/* move children to free_list */
+	list_splice_init(&desc->tx_list, &atchan->free_list);
+	/* move myself to free_list */
+	list_move(&desc->desc_node, &atchan->free_list);
+
+	/* unmap dma addresses (not on slave channels) */
+	if (!atchan->chan_common.private) {
+		struct device *parent = chan2parent(&atchan->chan_common);
+		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+				dma_unmap_single(parent,
+						desc->lli.daddr,
+						desc->len, DMA_FROM_DEVICE);
+			else
+				dma_unmap_page(parent,
+						desc->lli.daddr,
+						desc->len, DMA_FROM_DEVICE);
+		}
+		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+				dma_unmap_single(parent,
+						desc->lli.saddr,
+						desc->len, DMA_TO_DEVICE);
+			else
+				dma_unmap_page(parent,
+						desc->lli.saddr,
+						desc->len, DMA_TO_DEVICE);
+		}
+	}
+
+	/* for cyclic transfers,
+	 * no need to replay callback function while stopping */
+	if (!atc_chan_is_cyclic(atchan)) {
+		dma_async_tx_callback	callback = txd->callback;
+		void			*param = txd->callback_param;
+
+		/*
+		 * The API requires that no submissions are done from a
+		 * callback, so we don't need to drop the lock here
+		 */
+		if (callback)
+			callback(param);
+	}
+
+	dma_run_dependencies(txd);
+}
+
+/**
+ * atc_complete_all - finish work for all transactions
+ * @atchan: channel to complete transactions for
+ *
+ * Eventually submit queued descriptors if any
+ *
+ * Assume channel is idle while calling this function
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_complete_all(struct at_dma_chan *atchan)
+{
+	struct at_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n");
+
+	BUG_ON(atc_chan_is_enabled(atchan));
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	if (!list_empty(&atchan->queue))
+		atc_dostart(atchan, atc_first_queued(atchan));
+	/* empty active_list now it is completed */
+	list_splice_init(&atchan->active_list, &list);
+	/* empty queue list by moving descriptors (if any) to active_list */
+	list_splice_init(&atchan->queue, &atchan->active_list);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		atc_chain_complete(atchan, desc);
+}
+
+/**
+ * atc_cleanup_descriptors - cleanup up finished descriptors in active_list
+ * @atchan: channel to be cleaned up
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
+{
+	struct at_desc	*desc, *_desc;
+	struct at_desc	*child;
+
+	dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n");
+
+	list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
+		if (!(desc->lli.ctrla & ATC_DONE))
+			/* This one is currently in progress */
+			return;
+
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			if (!(child->lli.ctrla & ATC_DONE))
+				/* Currently in progress */
+				return;
+
+		/*
+		 * No descriptors so far seem to be in progress, i.e.
+		 * this chain must be done.
+		 */
+		atc_chain_complete(atchan, desc);
+	}
+}
+
+/**
+ * atc_advance_work - at the end of a transaction, move forward
+ * @atchan: channel where the transaction ended
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_advance_work(struct at_dma_chan *atchan)
+{
+	dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n");
+
+	if (list_empty(&atchan->active_list) ||
+	    list_is_singular(&atchan->active_list)) {
+		atc_complete_all(atchan);
+	} else {
+		atc_chain_complete(atchan, atc_first_active(atchan));
+		/* advance work */
+		atc_dostart(atchan, atc_first_active(atchan));
+	}
+}
+
+
+/**
+ * atc_handle_error - handle errors reported by DMA controller
+ * @atchan: channel where error occurs
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_handle_error(struct at_dma_chan *atchan)
+{
+	struct at_desc *bad_desc;
+	struct at_desc *child;
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * broked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	bad_desc = atc_first_active(atchan);
+	list_del_init(&bad_desc->desc_node);
+
+	/* As we are stopped, take advantage to push queued descriptors
+	 * in active_list */
+	list_splice_init(&atchan->queue, atchan->active_list.prev);
+
+	/* Try to restart the controller */
+	if (!list_empty(&atchan->active_list))
+		atc_dostart(atchan, atc_first_active(atchan));
+
+	/*
+	 * KERN_CRITICAL may seem harsh, but since this only happens
+	 * when someone submits a bad physical address in a
+	 * descriptor, we should consider ourselves lucky that the
+	 * controller flagged an error instead of scribbling over
+	 * random memory locations.
+	 */
+	dev_crit(chan2dev(&atchan->chan_common),
+			"Bad descriptor submitted for DMA!\n");
+	dev_crit(chan2dev(&atchan->chan_common),
+			"  cookie: %d\n", bad_desc->txd.cookie);
+	atc_dump_lli(atchan, &bad_desc->lli);
+	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+		atc_dump_lli(atchan, &child->lli);
+
+	/* Pretend the descriptor completed successfully */
+	atc_chain_complete(atchan, bad_desc);
+}
+
+/**
+ * atc_handle_cyclic - at the end of a period, run callback function
+ * @atchan: channel used for cyclic operations
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_handle_cyclic(struct at_dma_chan *atchan)
+{
+	struct at_desc			*first = atc_first_active(atchan);
+	struct dma_async_tx_descriptor	*txd = &first->txd;
+	dma_async_tx_callback		callback = txd->callback;
+	void				*param = txd->callback_param;
+
+	dev_vdbg(chan2dev(&atchan->chan_common),
+			"new cyclic period llp 0x%08x\n",
+			channel_readl(atchan, DSCR));
+
+	if (callback)
+		callback(param);
+}
+
+/*--  IRQ & Tasklet  ---------------------------------------------------*/
+
+static void atc_tasklet(unsigned long data)
+{
+	struct at_dma_chan *atchan = (struct at_dma_chan *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status))
+		atc_handle_error(atchan);
+	else if (atc_chan_is_cyclic(atchan))
+		atc_handle_cyclic(atchan);
+	else
+		atc_advance_work(atchan);
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
+{
+	struct at_dma		*atdma = (struct at_dma *)dev_id;
+	struct at_dma_chan	*atchan;
+	int			i;
+	u32			status, pending, imr;
+	int			ret = IRQ_NONE;
+
+	do {
+		imr = dma_readl(atdma, EBCIMR);
+		status = dma_readl(atdma, EBCISR);
+		pending = status & imr;
+
+		if (!pending)
+			break;
+
+		dev_vdbg(atdma->dma_common.dev,
+			"interrupt: status = 0x%08x, 0x%08x, 0x%08x\n",
+			 status, imr, pending);
+
+		for (i = 0; i < atdma->dma_common.chancnt; i++) {
+			atchan = &atdma->chan[i];
+			if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) {
+				if (pending & AT_DMA_ERR(i)) {
+					/* Disable channel on AHB error */
+					dma_writel(atdma, CHDR,
+						AT_DMA_RES(i) | atchan->mask);
+					/* Give information to tasklet */
+					set_bit(ATC_IS_ERROR, &atchan->status);
+				}
+				tasklet_schedule(&atchan->tasklet);
+				ret = IRQ_HANDLED;
+			}
+		}
+
+	} while (pending);
+
+	return ret;
+}
+
+
+/*--  DMA Engine API  --------------------------------------------------*/
+
+/**
+ * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine
+ * @desc: descriptor at the head of the transaction chain
+ *
+ * Queue chain if DMA engine is working already
+ *
+ * Cookie increment and adding to active_list or queue must be atomic
+ */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct at_desc		*desc = txd_to_at_desc(tx);
+	struct at_dma_chan	*atchan = to_at_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&atchan->active_list)) {
+		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+				desc->txd.cookie);
+		atc_dostart(atchan, desc);
+		list_add_tail(&desc->desc_node, &atchan->active_list);
+	} else {
+		dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+				desc->txd.cookie);
+		list_add_tail(&desc->desc_node, &atchan->queue);
+	}
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * atc_prep_dma_memcpy - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @src: operation virtual source address
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_desc		*desc = NULL;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	size_t			xfer_count;
+	size_t			offset;
+	unsigned int		src_width;
+	unsigned int		dst_width;
+	u32			ctrla;
+	u32			ctrlb;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n",
+			dest, src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	ctrla =   ATC_DEFAULT_CTRLA;
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+		| ATC_SRC_ADDR_MODE_INCR
+		| ATC_DST_ADDR_MODE_INCR
+		| ATC_FC_MEM2MEM;
+
+	/*
+	 * We can be a lot more clever here, but this should take care
+	 * of the most common optimization.
+	 */
+	if (!((src | dest  | len) & 3)) {
+		ctrla |= ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD;
+		src_width = dst_width = 2;
+	} else if (!((src | dest | len) & 1)) {
+		ctrla |= ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD;
+		src_width = dst_width = 1;
+	} else {
+		ctrla |= ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE;
+		src_width = dst_width = 0;
+	}
+
+	for (offset = 0; offset < len; offset += xfer_count << src_width) {
+		xfer_count = min_t(size_t, (len - offset) >> src_width,
+				ATC_BTSIZE_MAX);
+
+		desc = atc_desc_get(atchan);
+		if (!desc)
+			goto err_desc_get;
+
+		desc->lli.saddr = src + offset;
+		desc->lli.daddr = dest + offset;
+		desc->lli.ctrla = ctrla | xfer_count;
+		desc->lli.ctrlb = ctrlb;
+
+		desc->txd.cookie = 0;
+
+		atc_desc_chain(&first, &prev, desc);
+	}
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->len = len;
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(desc);
+
+	first->txd.flags = flags; /* client is in control of this ack */
+
+	return &first->txd;
+
+err_desc_get:
+	atc_desc_put(atchan, first);
+	return NULL;
+}
+
+
+/**
+ * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: tx descriptor status flags
+ * @context: transaction context (ignored)
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	u32			ctrla;
+	u32			ctrlb;
+	dma_addr_t		reg;
+	unsigned int		reg_width;
+	unsigned int		mem_width;
+	unsigned int		i;
+	struct scatterlist	*sg;
+	size_t			total_len = 0;
+
+	dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
+			sg_len,
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
+			flags);
+
+	if (unlikely(!atslave || !sg_len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
+	ctrlb = ATC_IEN;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
+		ctrla |=  ATC_DST_WIDTH(reg_width);
+		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
+			| ATC_SRC_ADDR_MODE_INCR
+			| ATC_FC_MEM2PER
+			| ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF);
+		reg = sconfig->dst_addr;
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct at_desc	*desc;
+			u32		len;
+			u32		mem;
+
+			desc = atc_desc_get(atchan);
+			if (!desc)
+				goto err_desc_get;
+
+			mem = sg_dma_address(sg);
+			len = sg_dma_len(sg);
+			mem_width = 2;
+			if (unlikely(mem & 3 || len & 3))
+				mem_width = 0;
+
+			desc->lli.saddr = mem;
+			desc->lli.daddr = reg;
+			desc->lli.ctrla = ctrla
+					| ATC_SRC_WIDTH(mem_width)
+					| len >> mem_width;
+			desc->lli.ctrlb = ctrlb;
+
+			atc_desc_chain(&first, &prev, desc);
+			total_len += len;
+		}
+		break;
+	case DMA_DEV_TO_MEM:
+		reg_width = convert_buswidth(sconfig->src_addr_width);
+		ctrla |=  ATC_SRC_WIDTH(reg_width);
+		ctrlb |=  ATC_DST_ADDR_MODE_INCR
+			| ATC_SRC_ADDR_MODE_FIXED
+			| ATC_FC_PER2MEM
+			| ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF);
+
+		reg = sconfig->src_addr;
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct at_desc	*desc;
+			u32		len;
+			u32		mem;
+
+			desc = atc_desc_get(atchan);
+			if (!desc)
+				goto err_desc_get;
+
+			mem = sg_dma_address(sg);
+			len = sg_dma_len(sg);
+			mem_width = 2;
+			if (unlikely(mem & 3 || len & 3))
+				mem_width = 0;
+
+			desc->lli.saddr = reg;
+			desc->lli.daddr = mem;
+			desc->lli.ctrla = ctrla
+					| ATC_DST_WIDTH(mem_width)
+					| len >> reg_width;
+			desc->lli.ctrlb = ctrlb;
+
+			atc_desc_chain(&first, &prev, desc);
+			total_len += len;
+		}
+		break;
+	default:
+		return NULL;
+	}
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(prev);
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->len = total_len;
+
+	/* first link descriptor of list is responsible of flags */
+	first->txd.flags = flags; /* client is in control of this ack */
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "not enough descriptors available\n");
+	atc_desc_put(atchan, first);
+	return NULL;
+}
+
+/**
+ * atc_dma_cyclic_check_values
+ * Check for too big/unaligned periods and unaligned DMA buffer
+ */
+static int
+atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
+		size_t period_len, enum dma_transfer_direction direction)
+{
+	if (period_len > (ATC_BTSIZE_MAX << reg_width))
+		goto err_out;
+	if (unlikely(period_len & ((1 << reg_width) - 1)))
+		goto err_out;
+	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+		goto err_out;
+	if (unlikely(!(direction & (DMA_DEV_TO_MEM | DMA_MEM_TO_DEV))))
+		goto err_out;
+
+	return 0;
+
+err_out:
+	return -EINVAL;
+}
+
+/**
+ * atc_dma_cyclic_fill_desc - Fill one period decriptor
+ */
+static int
+atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
+		unsigned int period_index, dma_addr_t buf_addr,
+		unsigned int reg_width, size_t period_len,
+		enum dma_transfer_direction direction)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	u32			ctrla;
+
+	/* prepare common CRTLA value */
+	ctrla =   ATC_DEFAULT_CTRLA | atslave->ctrla
+		| ATC_DST_WIDTH(reg_width)
+		| ATC_SRC_WIDTH(reg_width)
+		| period_len >> reg_width;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		desc->lli.saddr = buf_addr + (period_len * period_index);
+		desc->lli.daddr = sconfig->dst_addr;
+		desc->lli.ctrla = ctrla;
+		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
+				| ATC_SRC_ADDR_MODE_INCR
+				| ATC_FC_MEM2PER
+				| ATC_SIF(AT_DMA_MEM_IF)
+				| ATC_DIF(AT_DMA_PER_IF);
+		break;
+
+	case DMA_DEV_TO_MEM:
+		desc->lli.saddr = sconfig->src_addr;
+		desc->lli.daddr = buf_addr + (period_len * period_index);
+		desc->lli.ctrla = ctrla;
+		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
+				| ATC_SRC_ADDR_MODE_FIXED
+				| ATC_FC_PER2MEM
+				| ATC_SIF(AT_DMA_PER_IF)
+				| ATC_DIF(AT_DMA_MEM_IF);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * atc_prep_dma_cyclic - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ * @context: transfer context (ignored)
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	unsigned long		was_cyclic;
+	unsigned int		reg_width;
+	unsigned int		periods = buf_len / period_len;
+	unsigned int		i;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
+			buf_addr,
+			periods, buf_len, period_len);
+
+	if (unlikely(!atslave || !buf_len || !period_len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n");
+		return NULL;
+	}
+
+	was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status);
+	if (was_cyclic) {
+		dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n");
+		return NULL;
+	}
+
+	if (sconfig->direction == DMA_MEM_TO_DEV)
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
+	else
+		reg_width = convert_buswidth(sconfig->src_addr_width);
+
+	/* Check for too big/unaligned periods and unaligned DMA buffer */
+	if (atc_dma_cyclic_check_values(reg_width, buf_addr,
+					period_len, direction))
+		goto err_out;
+
+	/* build cyclic linked list */
+	for (i = 0; i < periods; i++) {
+		struct at_desc	*desc;
+
+		desc = atc_desc_get(atchan);
+		if (!desc)
+			goto err_desc_get;
+
+		if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr,
+					     reg_width, period_len, direction))
+			goto err_desc_get;
+
+		atc_desc_chain(&first, &prev, desc);
+	}
+
+	/* lets make a cyclic list */
+	prev->lli.dscr = first->txd.phys;
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->len = buf_len;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "not enough descriptors available\n");
+	atc_desc_put(atchan, first);
+err_out:
+	clear_bit(ATC_IS_CYCLIC, &atchan->status);
+	return NULL;
+}
+
+static int set_runtime_config(struct dma_chan *chan,
+			      struct dma_slave_config *sconfig)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&atchan->dma_sconfig.src_maxburst);
+	convert_burst(&atchan->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
+
+static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		       unsigned long arg)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	int			chan_id = atchan->chan_common.chan_id;
+	unsigned long		flags;
+
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(chan), "atc_control (%d)\n", cmd);
+
+	if (cmd == DMA_PAUSE) {
+		spin_lock_irqsave(&atchan->lock, flags);
+
+		dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id));
+		set_bit(ATC_IS_PAUSED, &atchan->status);
+
+		spin_unlock_irqrestore(&atchan->lock, flags);
+	} else if (cmd == DMA_RESUME) {
+		if (!atc_chan_is_paused(atchan))
+			return 0;
+
+		spin_lock_irqsave(&atchan->lock, flags);
+
+		dma_writel(atdma, CHDR, AT_DMA_RES(chan_id));
+		clear_bit(ATC_IS_PAUSED, &atchan->status);
+
+		spin_unlock_irqrestore(&atchan->lock, flags);
+	} else if (cmd == DMA_TERMINATE_ALL) {
+		struct at_desc	*desc, *_desc;
+		/*
+		 * This is only called when something went wrong elsewhere, so
+		 * we don't really care about the data. Just disable the
+		 * channel. We still have to poll the channel enable bit due
+		 * to AHB/HSB limitations.
+		 */
+		spin_lock_irqsave(&atchan->lock, flags);
+
+		/* disabling channel: must also remove suspend state */
+		dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask);
+
+		/* confirm that this channel is disabled */
+		while (dma_readl(atdma, CHSR) & atchan->mask)
+			cpu_relax();
+
+		/* active_list entries will end up before queued entries */
+		list_splice_init(&atchan->queue, &list);
+		list_splice_init(&atchan->active_list, &list);
+
+		/* Flush all pending and queued descriptors */
+		list_for_each_entry_safe(desc, _desc, &list, desc_node)
+			atc_chain_complete(atchan, desc);
+
+		clear_bit(ATC_IS_PAUSED, &atchan->status);
+		/* if channel dedicated to cyclic operations, free it */
+		clear_bit(ATC_IS_CYCLIC, &atchan->status);
+
+		spin_unlock_irqrestore(&atchan->lock, flags);
+	} else if (cmd == DMA_SLAVE_CONFIG) {
+		return set_runtime_config(chan, (struct dma_slave_config *)arg);
+	} else {
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+/**
+ * atc_tx_status - poll for transaction completion
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ * @txstate: if not %NULL updated with transaction state
+ *
+ * If @txstate is passed in, upon return it reflect the driver
+ * internal state and can be used with dma_async_is_complete() to check
+ * the status of multiple cookies without re-checking hardware state.
+ */
+static enum dma_status
+atc_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	dma_cookie_t		last_used;
+	dma_cookie_t		last_complete;
+	unsigned long		flags;
+	enum dma_status		ret;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS) {
+		atc_cleanup_descriptors(atchan);
+
+		ret = dma_cookie_status(chan, cookie, txstate);
+	}
+
+	last_complete = chan->completed_cookie;
+	last_used = chan->cookie;
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	if (ret != DMA_SUCCESS)
+		dma_set_residue(txstate, atc_first_active(atchan)->len);
+
+	if (atc_chan_is_paused(atchan))
+		ret = DMA_PAUSED;
+
+	dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d (d%d, u%d)\n",
+		 ret, cookie, last_complete ? last_complete : 0,
+		 last_used ? last_used : 0);
+
+	return ret;
+}
+
+/**
+ * atc_issue_pending - try to finish work
+ * @chan: target DMA channel
+ */
+static void atc_issue_pending(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	unsigned long		flags;
+
+	dev_vdbg(chan2dev(chan), "issue_pending\n");
+
+	/* Not needed for cyclic transfers */
+	if (atc_chan_is_cyclic(atchan))
+		return;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	if (!atc_chan_is_enabled(atchan)) {
+		atc_advance_work(atchan);
+	}
+	spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+/**
+ * atc_alloc_chan_resources - allocate resources for DMA channel
+ * @chan: allocate descriptor resources for this channel
+ * @client: current client requesting the channel be ready for requests
+ *
+ * return - the number of allocated descriptors
+ */
+static int atc_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc;
+	struct at_dma_slave	*atslave;
+	unsigned long		flags;
+	int			i;
+	u32			cfg;
+	LIST_HEAD(tmp_list);
+
+	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+	/* ASSERT:  channel is idle */
+	if (atc_chan_is_enabled(atchan)) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+		return -EIO;
+	}
+
+	cfg = ATC_DEFAULT_CFG;
+
+	atslave = chan->private;
+	if (atslave) {
+		/*
+		 * We need controller-specific data to set up slave
+		 * transfers.
+		 */
+		BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev);
+
+		/* if cfg configuration specified take it instad of default */
+		if (atslave->cfg)
+			cfg = atslave->cfg;
+	}
+
+	/* have we already been set up?
+	 * reconfigure channel but no need to reallocate descriptors */
+	if (!list_empty(&atchan->free_list))
+		return atchan->descs_allocated;
+
+	/* Allocate initial pool of descriptors */
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = atc_alloc_descriptor(chan, GFP_KERNEL);
+		if (!desc) {
+			dev_err(atdma->dma_common.dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->desc_node, &tmp_list);
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	atchan->descs_allocated = i;
+	list_splice(&tmp_list, &atchan->free_list);
+	dma_cookie_init(chan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	/* channel parameters */
+	channel_writel(atchan, CFG, cfg);
+
+	dev_dbg(chan2dev(chan),
+		"alloc_chan_resources: allocated %d descriptors\n",
+		atchan->descs_allocated);
+
+	return atchan->descs_allocated;
+}
+
+/**
+ * atc_free_chan_resources - free all channel resources
+ * @chan: DMA channel
+ */
+static void atc_free_chan_resources(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n",
+		atchan->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&atchan->active_list));
+	BUG_ON(!list_empty(&atchan->queue));
+	BUG_ON(atc_chan_is_enabled(atchan));
+
+	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
+		list_del(&desc->desc_node);
+		/* free link descriptor */
+		dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys);
+	}
+	list_splice_init(&atchan->free_list, &list);
+	atchan->descs_allocated = 0;
+	atchan->status = 0;
+
+	dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
+}
+
+
+/*--  Module Management  -----------------------------------------------*/
+
+/* cap_mask is a multi-u32 bitfield, fill it with proper C code. */
+static struct at_dma_platform_data at91sam9rl_config = {
+	.nr_channels = 2,
+};
+static struct at_dma_platform_data at91sam9g45_config = {
+	.nr_channels = 8,
+};
+
+#if defined(CONFIG_OF)
+static const struct of_device_id atmel_dma_dt_ids[] = {
+	{
+		.compatible = "atmel,at91sam9rl-dma",
+		.data = &at91sam9rl_config,
+	}, {
+		.compatible = "atmel,at91sam9g45-dma",
+		.data = &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+MODULE_DEVICE_TABLE(of, atmel_dma_dt_ids);
+#endif
+
+static const struct platform_device_id atdma_devtypes[] = {
+	{
+		.name = "at91sam9rl_dma",
+		.driver_data = (unsigned long) &at91sam9rl_config,
+	}, {
+		.name = "at91sam9g45_dma",
+		.driver_data = (unsigned long) &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+static inline struct at_dma_platform_data * __init at_dma_get_driver_data(
+						struct platform_device *pdev)
+{
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(atmel_dma_dt_ids, pdev->dev.of_node);
+		if (match == NULL)
+			return NULL;
+		return match->data;
+	}
+	return (struct at_dma_platform_data *)
+			platform_get_device_id(pdev)->driver_data;
+}
+
+/**
+ * at_dma_off - disable DMA controller
+ * @atdma: the Atmel HDAMC device
+ */
+static void at_dma_off(struct at_dma *atdma)
+{
+	dma_writel(atdma, EN, 0);
+
+	/* disable all interrupts */
+	dma_writel(atdma, EBCIDR, -1L);
+
+	/* confirm that all channels are disabled */
+	while (dma_readl(atdma, CHSR) & atdma->all_chan_mask)
+		cpu_relax();
+}
+
+static int __init at_dma_probe(struct platform_device *pdev)
+{
+	struct resource		*io;
+	struct at_dma		*atdma;
+	size_t			size;
+	int			irq;
+	int			err;
+	int			i;
+	struct at_dma_platform_data *plat_dat;
+
+	/* setup platform data for each SoC */
+	dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
+	dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
+
+	/* get DMA parameters from controller type */
+	plat_dat = at_dma_get_driver_data(pdev);
+	if (!plat_dat)
+		return -ENODEV;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	size = sizeof(struct at_dma);
+	size += plat_dat->nr_channels * sizeof(struct at_dma_chan);
+	atdma = kzalloc(size, GFP_KERNEL);
+	if (!atdma)
+		return -ENOMEM;
+
+	/* discover transaction capabilities */
+	atdma->dma_common.cap_mask = plat_dat->cap_mask;
+	atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
+
+	size = resource_size(io);
+	if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
+		err = -EBUSY;
+		goto err_kfree;
+	}
+
+	atdma->regs = ioremap(io->start, size);
+	if (!atdma->regs) {
+		err = -ENOMEM;
+		goto err_release_r;
+	}
+
+	atdma->clk = clk_get(&pdev->dev, "dma_clk");
+	if (IS_ERR(atdma->clk)) {
+		err = PTR_ERR(atdma->clk);
+		goto err_clk;
+	}
+	clk_enable(atdma->clk);
+
+	/* force dma off, just in case */
+	at_dma_off(atdma);
+
+	err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma);
+	if (err)
+		goto err_irq;
+
+	platform_set_drvdata(pdev, atdma);
+
+	/* create a pool of consistent memory blocks for hardware descriptors */
+	atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool",
+			&pdev->dev, sizeof(struct at_desc),
+			4 /* word alignment */, 0);
+	if (!atdma->dma_desc_pool) {
+		dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+		err = -ENOMEM;
+		goto err_pool_create;
+	}
+
+	/* clear any pending interrupt */
+	while (dma_readl(atdma, EBCISR))
+		cpu_relax();
+
+	/* initialize channels related values */
+	INIT_LIST_HEAD(&atdma->dma_common.channels);
+	for (i = 0; i < plat_dat->nr_channels; i++) {
+		struct at_dma_chan	*atchan = &atdma->chan[i];
+
+		atchan->chan_common.device = &atdma->dma_common;
+		dma_cookie_init(&atchan->chan_common);
+		list_add_tail(&atchan->chan_common.device_node,
+				&atdma->dma_common.channels);
+
+		atchan->ch_regs = atdma->regs + ch_regs(i);
+		spin_lock_init(&atchan->lock);
+		atchan->mask = 1 << i;
+
+		INIT_LIST_HEAD(&atchan->active_list);
+		INIT_LIST_HEAD(&atchan->queue);
+		INIT_LIST_HEAD(&atchan->free_list);
+
+		tasklet_init(&atchan->tasklet, atc_tasklet,
+				(unsigned long)atchan);
+		atc_enable_chan_irq(atdma, i);
+	}
+
+	/* set base routines */
+	atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources;
+	atdma->dma_common.device_free_chan_resources = atc_free_chan_resources;
+	atdma->dma_common.device_tx_status = atc_tx_status;
+	atdma->dma_common.device_issue_pending = atc_issue_pending;
+	atdma->dma_common.dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
+
+	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
+		atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
+		/* controller can do slave DMA: can trigger cyclic transfers */
+		dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask);
+		atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic;
+		atdma->dma_common.device_control = atc_control;
+	}
+
+	dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+	dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n",
+	  dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)  ? "slave " : "",
+	  plat_dat->nr_channels);
+
+	dma_async_device_register(&atdma->dma_common);
+
+	return 0;
+
+err_pool_create:
+	platform_set_drvdata(pdev, NULL);
+	free_irq(platform_get_irq(pdev, 0), atdma);
+err_irq:
+	clk_disable(atdma->clk);
+	clk_put(atdma->clk);
+err_clk:
+	iounmap(atdma->regs);
+	atdma->regs = NULL;
+err_release_r:
+	release_mem_region(io->start, size);
+err_kfree:
+	kfree(atdma);
+	return err;
+}
+
+static int __exit at_dma_remove(struct platform_device *pdev)
+{
+	struct at_dma		*atdma = platform_get_drvdata(pdev);
+	struct dma_chan		*chan, *_chan;
+	struct resource		*io;
+
+	at_dma_off(atdma);
+	dma_async_device_unregister(&atdma->dma_common);
+
+	dma_pool_destroy(atdma->dma_desc_pool);
+	platform_set_drvdata(pdev, NULL);
+	free_irq(platform_get_irq(pdev, 0), atdma);
+
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+
+		/* Disable interrupts */
+		atc_disable_chan_irq(atdma, chan->chan_id);
+		tasklet_disable(&atchan->tasklet);
+
+		tasklet_kill(&atchan->tasklet);
+		list_del(&chan->device_node);
+	}
+
+	clk_disable(atdma->clk);
+	clk_put(atdma->clk);
+
+	iounmap(atdma->regs);
+	atdma->regs = NULL;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(io->start, resource_size(io));
+
+	kfree(atdma);
+
+	return 0;
+}
+
+static void at_dma_shutdown(struct platform_device *pdev)
+{
+	struct at_dma	*atdma = platform_get_drvdata(pdev);
+
+	at_dma_off(platform_get_drvdata(pdev));
+	clk_disable(atdma->clk);
+}
+
+static int at_dma_prepare(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct at_dma *atdma = platform_get_drvdata(pdev);
+	struct dma_chan *chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan *atchan = to_at_dma_chan(chan);
+		/* wait for transaction completion (except in cyclic case) */
+		if (atc_chan_is_enabled(atchan) && !atc_chan_is_cyclic(atchan))
+			return -EAGAIN;
+	}
+	return 0;
+}
+
+static void atc_suspend_cyclic(struct at_dma_chan *atchan)
+{
+	struct dma_chan	*chan = &atchan->chan_common;
+
+	/* Channel should be paused by user
+	 * do it anyway even if it is not done already */
+	if (!atc_chan_is_paused(atchan)) {
+		dev_warn(chan2dev(chan),
+		"cyclic channel not paused, should be done by channel user\n");
+		atc_control(chan, DMA_PAUSE, 0);
+	}
+
+	/* now preserve additional data for cyclic operations */
+	/* next descriptor address in the cyclic list */
+	atchan->save_dscr = channel_readl(atchan, DSCR);
+
+	vdbg_dump_regs(atchan);
+}
+
+static int at_dma_suspend_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct at_dma *atdma = platform_get_drvdata(pdev);
+	struct dma_chan *chan, *_chan;
+
+	/* preserve data */
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+		if (atc_chan_is_cyclic(atchan))
+			atc_suspend_cyclic(atchan);
+		atchan->save_cfg = channel_readl(atchan, CFG);
+	}
+	atdma->save_imr = dma_readl(atdma, EBCIMR);
+
+	/* disable DMA controller */
+	at_dma_off(atdma);
+	clk_disable(atdma->clk);
+	return 0;
+}
+
+static void atc_resume_cyclic(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	/* restore channel status for cyclic descriptors list:
+	 * next descriptor in the cyclic list at the time of suspend */
+	channel_writel(atchan, SADDR, 0);
+	channel_writel(atchan, DADDR, 0);
+	channel_writel(atchan, CTRLA, 0);
+	channel_writel(atchan, CTRLB, 0);
+	channel_writel(atchan, DSCR, atchan->save_dscr);
+	dma_writel(atdma, CHER, atchan->mask);
+
+	/* channel pause status should be removed by channel user
+	 * We cannot take the initiative to do it here */
+
+	vdbg_dump_regs(atchan);
+}
+
+static int at_dma_resume_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct at_dma *atdma = platform_get_drvdata(pdev);
+	struct dma_chan *chan, *_chan;
+
+	/* bring back DMA controller */
+	clk_enable(atdma->clk);
+	dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+	/* clear any pending interrupt */
+	while (dma_readl(atdma, EBCISR))
+		cpu_relax();
+
+	/* restore saved data */
+	dma_writel(atdma, EBCIER, atdma->save_imr);
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+		channel_writel(atchan, CFG, atchan->save_cfg);
+		if (atc_chan_is_cyclic(atchan))
+			atc_resume_cyclic(atchan);
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops at_dma_dev_pm_ops = {
+	.prepare = at_dma_prepare,
+	.suspend_noirq = at_dma_suspend_noirq,
+	.resume_noirq = at_dma_resume_noirq,
+};
+
+static struct platform_driver at_dma_driver = {
+	.remove		= __exit_p(at_dma_remove),
+	.shutdown	= at_dma_shutdown,
+	.id_table	= atdma_devtypes,
+	.driver = {
+		.name	= "at_hdmac",
+		.pm	= &at_dma_dev_pm_ops,
+		.of_match_table	= of_match_ptr(atmel_dma_dt_ids),
+	},
+};
+
+static int __init at_dma_init(void)
+{
+	return platform_driver_probe(&at_dma_driver, at_dma_probe);
+}
+subsys_initcall(at_dma_init);
+
+static void __exit at_dma_exit(void)
+{
+	platform_driver_unregister(&at_dma_driver);
+}
+module_exit(at_dma_exit);
+
+MODULE_DESCRIPTION("Atmel AHB DMA Controller driver");
+MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:at_hdmac");
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
new file mode 100644
index 00000000..897a8bca
--- /dev/null
+++ b/drivers/dma/at_hdmac_regs.h
@@ -0,0 +1,428 @@
+/*
+ * Header file for the Atmel AHB DMA Controller driver
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef AT_HDMAC_REGS_H
+#define	AT_HDMAC_REGS_H
+
+#include <mach/at_hdmac.h>
+
+#define	AT_DMA_MAX_NR_CHANNELS	8
+
+
+#define	AT_DMA_GCFG	0x00	/* Global Configuration Register */
+#define		AT_DMA_IF_BIGEND(i)	(0x1 << (i))	/* AHB-Lite Interface i in Big-endian mode */
+#define		AT_DMA_ARB_CFG	(0x1 << 4)	/* Arbiter mode. */
+#define			AT_DMA_ARB_CFG_FIXED		(0x0 << 4)
+#define			AT_DMA_ARB_CFG_ROUND_ROBIN	(0x1 << 4)
+
+#define	AT_DMA_EN	0x04	/* Controller Enable Register */
+#define		AT_DMA_ENABLE	(0x1 << 0)
+
+#define	AT_DMA_SREQ	0x08	/* Software Single Request Register */
+#define		AT_DMA_SSREQ(x)	(0x1 << ((x) << 1))		/* Request a source single transfer on channel x */
+#define		AT_DMA_DSREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination single transfer on channel x */
+
+#define	AT_DMA_CREQ	0x0C	/* Software Chunk Transfer Request Register */
+#define		AT_DMA_SCREQ(x)	(0x1 << ((x) << 1))		/* Request a source chunk transfer on channel x */
+#define		AT_DMA_DCREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination chunk transfer on channel x */
+
+#define	AT_DMA_LAST	0x10	/* Software Last Transfer Flag Register */
+#define		AT_DMA_SLAST(x)	(0x1 << ((x) << 1))		/* This src rq is last tx of buffer on channel x */
+#define		AT_DMA_DLAST(x)	(0x1 << (1 + ((x) << 1)))	/* This dst rq is last tx of buffer on channel x */
+
+#define	AT_DMA_SYNC	0x14	/* Request Synchronization Register */
+#define		AT_DMA_SYR(h)	(0x1 << (h))			/* Synchronize handshake line h */
+
+/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
+#define	AT_DMA_EBCIER	0x18	/* Enable register */
+#define	AT_DMA_EBCIDR	0x1C	/* Disable register */
+#define	AT_DMA_EBCIMR	0x20	/* Mask Register */
+#define	AT_DMA_EBCISR	0x24	/* Status Register */
+#define		AT_DMA_CBTC_OFFSET	8
+#define		AT_DMA_ERR_OFFSET	16
+#define		AT_DMA_BTC(x)	(0x1 << (x))
+#define		AT_DMA_CBTC(x)	(0x1 << (AT_DMA_CBTC_OFFSET + (x)))
+#define		AT_DMA_ERR(x)	(0x1 << (AT_DMA_ERR_OFFSET + (x)))
+
+#define	AT_DMA_CHER	0x28	/* Channel Handler Enable Register */
+#define		AT_DMA_ENA(x)	(0x1 << (x))
+#define		AT_DMA_SUSP(x)	(0x1 << ( 8 + (x)))
+#define		AT_DMA_KEEP(x)	(0x1 << (24 + (x)))
+
+#define	AT_DMA_CHDR	0x2C	/* Channel Handler Disable Register */
+#define		AT_DMA_DIS(x)	(0x1 << (x))
+#define		AT_DMA_RES(x)	(0x1 << ( 8 + (x)))
+
+#define	AT_DMA_CHSR	0x30	/* Channel Handler Status Register */
+#define		AT_DMA_EMPT(x)	(0x1 << (16 + (x)))
+#define		AT_DMA_STAL(x)	(0x1 << (24 + (x)))
+
+
+#define	AT_DMA_CH_REGS_BASE	0x3C	/* Channel registers base address */
+#define	ch_regs(x)	(AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
+
+/* Hardware register offset for each channel */
+#define	ATC_SADDR_OFFSET	0x00	/* Source Address Register */
+#define	ATC_DADDR_OFFSET	0x04	/* Destination Address Register */
+#define	ATC_DSCR_OFFSET		0x08	/* Descriptor Address Register */
+#define	ATC_CTRLA_OFFSET	0x0C	/* Control A Register */
+#define	ATC_CTRLB_OFFSET	0x10	/* Control B Register */
+#define	ATC_CFG_OFFSET		0x14	/* Configuration Register */
+#define	ATC_SPIP_OFFSET		0x18	/* Src PIP Configuration Register */
+#define	ATC_DPIP_OFFSET		0x1C	/* Dst PIP Configuration Register */
+
+
+/* Bitfield definitions */
+
+/* Bitfields in DSCR */
+#define	ATC_DSCR_IF(i)		(0x3 & (i))	/* Dsc feched via AHB-Lite Interface i */
+
+/* Bitfields in CTRLA */
+#define	ATC_BTSIZE_MAX		0xFFFFUL	/* Maximum Buffer Transfer Size */
+#define	ATC_BTSIZE(x)		(ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
+/* Chunck Tranfer size definitions are in at_hdmac.h */
+#define	ATC_SRC_WIDTH_MASK	(0x3 << 24)	/* Source Single Transfer Size */
+#define		ATC_SRC_WIDTH(x)	((x) << 24)
+#define		ATC_SRC_WIDTH_BYTE	(0x0 << 24)
+#define		ATC_SRC_WIDTH_HALFWORD	(0x1 << 24)
+#define		ATC_SRC_WIDTH_WORD	(0x2 << 24)
+#define	ATC_DST_WIDTH_MASK	(0x3 << 28)	/* Destination Single Transfer Size */
+#define		ATC_DST_WIDTH(x)	((x) << 28)
+#define		ATC_DST_WIDTH_BYTE	(0x0 << 28)
+#define		ATC_DST_WIDTH_HALFWORD	(0x1 << 28)
+#define		ATC_DST_WIDTH_WORD	(0x2 << 28)
+#define	ATC_DONE		(0x1 << 31)	/* Tx Done (only written back in descriptor) */
+
+/* Bitfields in CTRLB */
+#define	ATC_SIF(i)		(0x3 & (i))	/* Src tx done via AHB-Lite Interface i */
+#define	ATC_DIF(i)		((0x3 & (i)) <<  4)	/* Dst tx done via AHB-Lite Interface i */
+				  /* Specify AHB interfaces */
+#define AT_DMA_MEM_IF		0 /* interface 0 as memory interface */
+#define AT_DMA_PER_IF		1 /* interface 1 as peripheral interface */
+
+#define	ATC_SRC_PIP		(0x1 <<  8)	/* Source Picture-in-Picture enabled */
+#define	ATC_DST_PIP		(0x1 << 12)	/* Destination Picture-in-Picture enabled */
+#define	ATC_SRC_DSCR_DIS	(0x1 << 16)	/* Src Descriptor fetch disable */
+#define	ATC_DST_DSCR_DIS	(0x1 << 20)	/* Dst Descriptor fetch disable */
+#define	ATC_FC_MASK		(0x7 << 21)	/* Choose Flow Controller */
+#define		ATC_FC_MEM2MEM		(0x0 << 21)	/* Mem-to-Mem (DMA) */
+#define		ATC_FC_MEM2PER		(0x1 << 21)	/* Mem-to-Periph (DMA) */
+#define		ATC_FC_PER2MEM		(0x2 << 21)	/* Periph-to-Mem (DMA) */
+#define		ATC_FC_PER2PER		(0x3 << 21)	/* Periph-to-Periph (DMA) */
+#define		ATC_FC_PER2MEM_PER	(0x4 << 21)	/* Periph-to-Mem (Peripheral) */
+#define		ATC_FC_MEM2PER_PER	(0x5 << 21)	/* Mem-to-Periph (Peripheral) */
+#define		ATC_FC_PER2PER_SRCPER	(0x6 << 21)	/* Periph-to-Periph (Src Peripheral) */
+#define		ATC_FC_PER2PER_DSTPER	(0x7 << 21)	/* Periph-to-Periph (Dst Peripheral) */
+#define	ATC_SRC_ADDR_MODE_MASK	(0x3 << 24)
+#define		ATC_SRC_ADDR_MODE_INCR	(0x0 << 24)	/* Incrementing Mode */
+#define		ATC_SRC_ADDR_MODE_DECR	(0x1 << 24)	/* Decrementing Mode */
+#define		ATC_SRC_ADDR_MODE_FIXED	(0x2 << 24)	/* Fixed Mode */
+#define	ATC_DST_ADDR_MODE_MASK	(0x3 << 28)
+#define		ATC_DST_ADDR_MODE_INCR	(0x0 << 28)	/* Incrementing Mode */
+#define		ATC_DST_ADDR_MODE_DECR	(0x1 << 28)	/* Decrementing Mode */
+#define		ATC_DST_ADDR_MODE_FIXED	(0x2 << 28)	/* Fixed Mode */
+#define	ATC_IEN			(0x1 << 30)	/* BTC interrupt enable (active low) */
+#define	ATC_AUTO		(0x1 << 31)	/* Auto multiple buffer tx enable */
+
+/* Bitfields in CFG */
+/* are in at_hdmac.h */
+
+/* Bitfields in SPIP */
+#define	ATC_SPIP_HOLE(x)	(0xFFFFU & (x))
+#define	ATC_SPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+
+/* Bitfields in DPIP */
+#define	ATC_DPIP_HOLE(x)	(0xFFFFU & (x))
+#define	ATC_DPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+
+
+/*--  descriptors  -----------------------------------------------------*/
+
+/* LLI == Linked List Item; aka DMA buffer descriptor */
+struct at_lli {
+	/* values that are not changed by hardware */
+	dma_addr_t	saddr;
+	dma_addr_t	daddr;
+	/* value that may get written back: */
+	u32		ctrla;
+	/* more values that are not changed by hardware */
+	u32		ctrlb;
+	dma_addr_t	dscr;	/* chain to next lli */
+};
+
+/**
+ * struct at_desc - software descriptor
+ * @at_lli: hardware lli structure
+ * @txd: support for the async_tx api
+ * @desc_node: node on the channed descriptors list
+ * @len: total transaction bytecount
+ */
+struct at_desc {
+	/* FIRST values the hardware uses */
+	struct at_lli			lli;
+
+	/* THEN values for driver housekeeping */
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	struct list_head		desc_node;
+	size_t				len;
+};
+
+static inline struct at_desc *
+txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct at_desc, txd);
+}
+
+
+/*--  Channels  --------------------------------------------------------*/
+
+/**
+ * atc_status - information bits stored in channel status flag
+ *
+ * Manipulated with atomic operations.
+ */
+enum atc_status {
+	ATC_IS_ERROR = 0,
+	ATC_IS_PAUSED = 1,
+	ATC_IS_CYCLIC = 24,
+};
+
+/**
+ * struct at_dma_chan - internal representation of an Atmel HDMAC channel
+ * @chan_common: common dmaengine channel object members
+ * @device: parent device
+ * @ch_regs: memory mapped register base
+ * @mask: channel index in a mask
+ * @status: transmit status information from irq/prep* functions
+ *                to tasklet (use atomic operations)
+ * @tasklet: bottom half to finish transaction work
+ * @save_cfg: configuration register that is saved on suspend/resume cycle
+ * @save_dscr: for cyclic operations, preserve next descriptor address in
+ *             the cyclic list on suspend/resume cycle
+ * @dma_sconfig: configuration for slave transfers, passed via DMA_SLAVE_CONFIG
+ * @lock: serializes enqueue/dequeue operations to descriptors lists
+ * @active_list: list of descriptors dmaengine is being running on
+ * @queue: list of descriptors ready to be submitted to engine
+ * @free_list: list of descriptors usable by the channel
+ * @descs_allocated: records the actual size of the descriptor pool
+ */
+struct at_dma_chan {
+	struct dma_chan		chan_common;
+	struct at_dma		*device;
+	void __iomem		*ch_regs;
+	u8			mask;
+	unsigned long		status;
+	struct tasklet_struct	tasklet;
+	u32			save_cfg;
+	u32			save_dscr;
+	struct dma_slave_config dma_sconfig;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		descs_allocated;
+};
+
+#define	channel_readl(atchan, name) \
+	__raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET)
+
+#define	channel_writel(atchan, name, val) \
+	__raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
+
+static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct at_dma_chan, chan_common);
+}
+
+/*
+ * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+/*
+ * Fix sconfig's bus width according to at_hdmac.
+ * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
+ */
+static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return 1;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return 2;
+	default:
+		/* For 1 byte width or fallback */
+		return 0;
+	}
+}
+
+/*--  Controller  ------------------------------------------------------*/
+
+/**
+ * struct at_dma - internal representation of an Atmel HDMA Controller
+ * @chan_common: common dmaengine dma_device object members
+ * @atdma_devtype: identifier of DMA controller compatibility
+ * @ch_regs: memory mapped register base
+ * @clk: dma controller clock
+ * @save_imr: interrupt mask register that is saved on suspend/resume cycle
+ * @all_chan_mask: all channels availlable in a mask
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @chan: channels table to store at_dma_chan structures
+ */
+struct at_dma {
+	struct dma_device	dma_common;
+	void __iomem		*regs;
+	struct clk		*clk;
+	u32			save_imr;
+
+	u8			all_chan_mask;
+
+	struct dma_pool		*dma_desc_pool;
+	/* AT THE END channels table */
+	struct at_dma_chan	chan[0];
+};
+
+#define	dma_readl(atdma, name) \
+	__raw_readl((atdma)->regs + AT_DMA_##name)
+#define	dma_writel(atdma, name, val) \
+	__raw_writel((val), (atdma)->regs + AT_DMA_##name)
+
+static inline struct at_dma *to_at_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct at_dma, dma_common);
+}
+
+
+/*--  Helper functions  ------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+#if defined(VERBOSE_DEBUG)
+static void vdbg_dump_regs(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	dev_err(chan2dev(&atchan->chan_common),
+		"  channel %d : imr = 0x%x, chsr = 0x%x\n",
+		atchan->chan_common.chan_id,
+		dma_readl(atdma, EBCIMR),
+		dma_readl(atdma, CHSR));
+
+	dev_err(chan2dev(&atchan->chan_common),
+		"  channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
+		channel_readl(atchan, SADDR),
+		channel_readl(atchan, DADDR),
+		channel_readl(atchan, CTRLA),
+		channel_readl(atchan, CTRLB),
+		channel_readl(atchan, CFG),
+		channel_readl(atchan, DSCR));
+}
+#else
+static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
+#endif
+
+static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
+{
+	dev_printk(KERN_CRIT, chan2dev(&atchan->chan_common),
+			"  desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
+			lli->saddr, lli->daddr,
+			lli->ctrla, lli->ctrlb, lli->dscr);
+}
+
+
+static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on)
+{
+	u32 ebci;
+
+	/* enable interrupts on buffer transfer completion & error */
+	ebci =    AT_DMA_BTC(chan_id)
+		| AT_DMA_ERR(chan_id);
+	if (on)
+		dma_writel(atdma, EBCIER, ebci);
+	else
+		dma_writel(atdma, EBCIDR, ebci);
+}
+
+static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+	atc_setup_irq(atdma, chan_id, 1);
+}
+
+static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+	atc_setup_irq(atdma, chan_id, 0);
+}
+
+
+/**
+ * atc_chan_is_enabled - test if given channel is enabled
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	return !!(dma_readl(atdma, CHSR) & atchan->mask);
+}
+
+/**
+ * atc_chan_is_paused - test channel pause/resume status
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_paused(struct at_dma_chan *atchan)
+{
+	return test_bit(ATC_IS_PAUSED, &atchan->status);
+}
+
+/**
+ * atc_chan_is_cyclic - test if given channel has cyclic property set
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan)
+{
+	return test_bit(ATC_IS_CYCLIC, &atchan->status);
+}
+
+/**
+ * set_desc_eol - set end-of-link to descriptor so it will end transfer
+ * @desc: descriptor, signle or at the end of a chain, to end chain on
+ */
+static void set_desc_eol(struct at_desc *desc)
+{
+	u32 ctrlb = desc->lli.ctrlb;
+
+	ctrlb &= ~ATC_IEN;
+	ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+
+	desc->lli.ctrlb = ctrlb;
+	desc->lli.dscr = 0;
+}
+
+#endif /* AT_HDMAC_REGS_H */
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
new file mode 100644
index 00000000..750925f9
--- /dev/null
+++ b/drivers/dma/coh901318.c
@@ -0,0 +1,1602 @@
+/*
+ * driver/dma/coh901318.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * DMA driver for COH 901 318
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h> /* printk() */
+#include <linux/fs.h> /* everything... */
+#include <linux/scatterlist.h>
+#include <linux/slab.h> /* kmalloc() */
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/irqreturn.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <mach/coh901318.h>
+
+#include "coh901318_lli.h"
+#include "dmaengine.h"
+
+#define COHC_2_DEV(cohc) (&cohc->chan.dev->device)
+
+#ifdef VERBOSE_DEBUG
+#define COH_DBG(x) ({ if (1) x; 0; })
+#else
+#define COH_DBG(x) ({ if (0) x; 0; })
+#endif
+
+struct coh901318_desc {
+	struct dma_async_tx_descriptor desc;
+	struct list_head node;
+	struct scatterlist *sg;
+	unsigned int sg_len;
+	struct coh901318_lli *lli;
+	enum dma_transfer_direction dir;
+	unsigned long flags;
+	u32 head_config;
+	u32 head_ctrl;
+};
+
+struct coh901318_base {
+	struct device *dev;
+	void __iomem *virtbase;
+	struct coh901318_pool pool;
+	struct powersave pm;
+	struct dma_device dma_slave;
+	struct dma_device dma_memcpy;
+	struct coh901318_chan *chans;
+	struct coh901318_platform *platform;
+};
+
+struct coh901318_chan {
+	spinlock_t lock;
+	int allocated;
+	int id;
+	int stopped;
+
+	struct work_struct free_work;
+	struct dma_chan chan;
+
+	struct tasklet_struct tasklet;
+
+	struct list_head active;
+	struct list_head queue;
+	struct list_head free;
+
+	unsigned long nbr_active_done;
+	unsigned long busy;
+
+	u32 runtime_addr;
+	u32 runtime_ctrl;
+
+	struct coh901318_base *base;
+};
+
+static void coh901318_list_print(struct coh901318_chan *cohc,
+				 struct coh901318_lli *lli)
+{
+	struct coh901318_lli *l = lli;
+	int i = 0;
+
+	while (l) {
+		dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src 0x%x"
+			 ", dst 0x%x, link 0x%x virt_link_addr 0x%p\n",
+			 i, l, l->control, l->src_addr, l->dst_addr,
+			 l->link_addr, l->virt_link_addr);
+		i++;
+		l = l->virt_link_addr;
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define COH901318_DEBUGFS_ASSIGN(x, y) (x = y)
+
+static struct coh901318_base *debugfs_dma_base;
+static struct dentry *dma_dentry;
+
+static int coh901318_debugfs_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *f_pos)
+{
+	u64 started_channels = debugfs_dma_base->pm.started_channels;
+	int pool_count = debugfs_dma_base->pool.debugfs_pool_counter;
+	int i;
+	int ret = 0;
+	char *dev_buf;
+	char *tmp;
+	int dev_size;
+
+	dev_buf = kmalloc(4*1024, GFP_KERNEL);
+	if (dev_buf == NULL)
+		goto err_kmalloc;
+	tmp = dev_buf;
+
+	tmp += sprintf(tmp, "DMA -- enabled dma channels\n");
+
+	for (i = 0; i < debugfs_dma_base->platform->max_channels; i++)
+		if (started_channels & (1 << i))
+			tmp += sprintf(tmp, "channel %d\n", i);
+
+	tmp += sprintf(tmp, "Pool alloc nbr %d\n", pool_count);
+	dev_size = tmp  - dev_buf;
+
+	/* No more to read if offset != 0 */
+	if (*f_pos > dev_size)
+		goto out;
+
+	if (count > dev_size - *f_pos)
+		count = dev_size - *f_pos;
+
+	if (copy_to_user(buf, dev_buf + *f_pos, count))
+		ret = -EINVAL;
+	ret = count;
+	*f_pos += count;
+
+ out:
+	kfree(dev_buf);
+	return ret;
+
+ err_kmalloc:
+	return 0;
+}
+
+static const struct file_operations coh901318_debugfs_status_operations = {
+	.owner		= THIS_MODULE,
+	.open		= simple_open,
+	.read		= coh901318_debugfs_read,
+	.llseek		= default_llseek,
+};
+
+
+static int __init init_coh901318_debugfs(void)
+{
+
+	dma_dentry = debugfs_create_dir("dma", NULL);
+
+	(void) debugfs_create_file("status",
+				   S_IFREG | S_IRUGO,
+				   dma_dentry, NULL,
+				   &coh901318_debugfs_status_operations);
+	return 0;
+}
+
+static void __exit exit_coh901318_debugfs(void)
+{
+	debugfs_remove_recursive(dma_dentry);
+}
+
+module_init(init_coh901318_debugfs);
+module_exit(exit_coh901318_debugfs);
+#else
+
+#define COH901318_DEBUGFS_ASSIGN(x, y)
+
+#endif /* CONFIG_DEBUG_FS */
+
+static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct coh901318_chan, chan);
+}
+
+static inline dma_addr_t
+cohc_dev_addr(struct coh901318_chan *cohc)
+{
+	/* Runtime supplied address will take precedence */
+	if (cohc->runtime_addr)
+		return cohc->runtime_addr;
+	return cohc->base->platform->chan_conf[cohc->id].dev_addr;
+}
+
+static inline const struct coh901318_params *
+cohc_chan_param(struct coh901318_chan *cohc)
+{
+	return &cohc->base->platform->chan_conf[cohc->id].param;
+}
+
+static inline const struct coh_dma_channel *
+cohc_chan_conf(struct coh901318_chan *cohc)
+{
+	return &cohc->base->platform->chan_conf[cohc->id];
+}
+
+static void enable_powersave(struct coh901318_chan *cohc)
+{
+	unsigned long flags;
+	struct powersave *pm = &cohc->base->pm;
+
+	spin_lock_irqsave(&pm->lock, flags);
+
+	pm->started_channels &= ~(1ULL << cohc->id);
+
+	if (!pm->started_channels) {
+		/* DMA no longer intends to access memory */
+		cohc->base->platform->access_memory_state(cohc->base->dev,
+							  false);
+	}
+
+	spin_unlock_irqrestore(&pm->lock, flags);
+}
+static void disable_powersave(struct coh901318_chan *cohc)
+{
+	unsigned long flags;
+	struct powersave *pm = &cohc->base->pm;
+
+	spin_lock_irqsave(&pm->lock, flags);
+
+	if (!pm->started_channels) {
+		/* DMA intends to access memory */
+		cohc->base->platform->access_memory_state(cohc->base->dev,
+							  true);
+	}
+
+	pm->started_channels |= (1ULL << cohc->id);
+
+	spin_unlock_irqrestore(&pm->lock, flags);
+}
+
+static inline int coh901318_set_ctrl(struct coh901318_chan *cohc, u32 control)
+{
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	writel(control,
+	       virtbase + COH901318_CX_CTRL +
+	       COH901318_CX_CTRL_SPACING * channel);
+	return 0;
+}
+
+static inline int coh901318_set_conf(struct coh901318_chan *cohc, u32 conf)
+{
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	writel(conf,
+	       virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING*channel);
+	return 0;
+}
+
+
+static int coh901318_start(struct coh901318_chan *cohc)
+{
+	u32 val;
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	disable_powersave(cohc);
+
+	val = readl(virtbase + COH901318_CX_CFG +
+		    COH901318_CX_CFG_SPACING * channel);
+
+	/* Enable channel */
+	val |= COH901318_CX_CFG_CH_ENABLE;
+	writel(val, virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING * channel);
+
+	return 0;
+}
+
+static int coh901318_prep_linked_list(struct coh901318_chan *cohc,
+				      struct coh901318_lli *lli)
+{
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	BUG_ON(readl(virtbase + COH901318_CX_STAT +
+		     COH901318_CX_STAT_SPACING*channel) &
+	       COH901318_CX_STAT_ACTIVE);
+
+	writel(lli->src_addr,
+	       virtbase + COH901318_CX_SRC_ADDR +
+	       COH901318_CX_SRC_ADDR_SPACING * channel);
+
+	writel(lli->dst_addr, virtbase +
+	       COH901318_CX_DST_ADDR +
+	       COH901318_CX_DST_ADDR_SPACING * channel);
+
+	writel(lli->link_addr, virtbase + COH901318_CX_LNK_ADDR +
+	       COH901318_CX_LNK_ADDR_SPACING * channel);
+
+	writel(lli->control, virtbase + COH901318_CX_CTRL +
+	       COH901318_CX_CTRL_SPACING * channel);
+
+	return 0;
+}
+
+static struct coh901318_desc *
+coh901318_desc_get(struct coh901318_chan *cohc)
+{
+	struct coh901318_desc *desc;
+
+	if (list_empty(&cohc->free)) {
+		/* alloc new desc because we're out of used ones
+		 * TODO: alloc a pile of descs instead of just one,
+		 * avoid many small allocations.
+		 */
+		desc = kzalloc(sizeof(struct coh901318_desc), GFP_NOWAIT);
+		if (desc == NULL)
+			goto out;
+		INIT_LIST_HEAD(&desc->node);
+		dma_async_tx_descriptor_init(&desc->desc, &cohc->chan);
+	} else {
+		/* Reuse an old desc. */
+		desc = list_first_entry(&cohc->free,
+					struct coh901318_desc,
+					node);
+		list_del(&desc->node);
+		/* Initialize it a bit so it's not insane */
+		desc->sg = NULL;
+		desc->sg_len = 0;
+		desc->desc.callback = NULL;
+		desc->desc.callback_param = NULL;
+	}
+
+ out:
+	return desc;
+}
+
+static void
+coh901318_desc_free(struct coh901318_chan *cohc, struct coh901318_desc *cohd)
+{
+	list_add_tail(&cohd->node, &cohc->free);
+}
+
+/* call with irq lock held */
+static void
+coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc)
+{
+	list_add_tail(&desc->node, &cohc->active);
+}
+
+static struct coh901318_desc *
+coh901318_first_active_get(struct coh901318_chan *cohc)
+{
+	struct coh901318_desc *d;
+
+	if (list_empty(&cohc->active))
+		return NULL;
+
+	d = list_first_entry(&cohc->active,
+			     struct coh901318_desc,
+			     node);
+	return d;
+}
+
+static void
+coh901318_desc_remove(struct coh901318_desc *cohd)
+{
+	list_del(&cohd->node);
+}
+
+static void
+coh901318_desc_queue(struct coh901318_chan *cohc, struct coh901318_desc *desc)
+{
+	list_add_tail(&desc->node, &cohc->queue);
+}
+
+static struct coh901318_desc *
+coh901318_first_queued(struct coh901318_chan *cohc)
+{
+	struct coh901318_desc *d;
+
+	if (list_empty(&cohc->queue))
+		return NULL;
+
+	d = list_first_entry(&cohc->queue,
+			     struct coh901318_desc,
+			     node);
+	return d;
+}
+
+static inline u32 coh901318_get_bytes_in_lli(struct coh901318_lli *in_lli)
+{
+	struct coh901318_lli *lli = in_lli;
+	u32 bytes = 0;
+
+	while (lli) {
+		bytes += lli->control & COH901318_CX_CTRL_TC_VALUE_MASK;
+		lli = lli->virt_link_addr;
+	}
+	return bytes;
+}
+
+/*
+ * Get the number of bytes left to transfer on this channel,
+ * it is unwise to call this before stopping the channel for
+ * absolute measures, but for a rough guess you can still call
+ * it.
+ */
+static u32 coh901318_get_bytes_left(struct dma_chan *chan)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	struct coh901318_desc *cohd;
+	struct list_head *pos;
+	unsigned long flags;
+	u32 left = 0;
+	int i = 0;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * If there are many queued jobs, we iterate and add the
+	 * size of them all. We take a special look on the first
+	 * job though, since it is probably active.
+	 */
+	list_for_each(pos, &cohc->active) {
+		/*
+		 * The first job in the list will be working on the
+		 * hardware. The job can be stopped but still active,
+		 * so that the transfer counter is somewhere inside
+		 * the buffer.
+		 */
+		cohd = list_entry(pos, struct coh901318_desc, node);
+
+		if (i == 0) {
+			struct coh901318_lli *lli;
+			dma_addr_t ladd;
+
+			/* Read current transfer count value */
+			left = readl(cohc->base->virtbase +
+				     COH901318_CX_CTRL +
+				     COH901318_CX_CTRL_SPACING * cohc->id) &
+				COH901318_CX_CTRL_TC_VALUE_MASK;
+
+			/* See if the transfer is linked... */
+			ladd = readl(cohc->base->virtbase +
+				     COH901318_CX_LNK_ADDR +
+				     COH901318_CX_LNK_ADDR_SPACING *
+				     cohc->id) &
+				~COH901318_CX_LNK_LINK_IMMEDIATE;
+			/* Single transaction */
+			if (!ladd)
+				continue;
+
+			/*
+			 * Linked transaction, follow the lli, find the
+			 * currently processing lli, and proceed to the next
+			 */
+			lli = cohd->lli;
+			while (lli && lli->link_addr != ladd)
+				lli = lli->virt_link_addr;
+
+			if (lli)
+				lli = lli->virt_link_addr;
+
+			/*
+			 * Follow remaining lli links around to count the total
+			 * number of bytes left
+			 */
+			left += coh901318_get_bytes_in_lli(lli);
+		} else {
+			left += coh901318_get_bytes_in_lli(cohd->lli);
+		}
+		i++;
+	}
+
+	/* Also count bytes in the queued jobs */
+	list_for_each(pos, &cohc->queue) {
+		cohd = list_entry(pos, struct coh901318_desc, node);
+		left += coh901318_get_bytes_in_lli(cohd->lli);
+	}
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return left;
+}
+
+/*
+ * Pauses a transfer without losing data. Enables power save.
+ * Use this function in conjunction with coh901318_resume.
+ */
+static void coh901318_pause(struct dma_chan *chan)
+{
+	u32 val;
+	unsigned long flags;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* Disable channel in HW */
+	val = readl(virtbase + COH901318_CX_CFG +
+		    COH901318_CX_CFG_SPACING * channel);
+
+	/* Stopping infinite transfer */
+	if ((val & COH901318_CX_CTRL_TC_ENABLE) == 0 &&
+	    (val & COH901318_CX_CFG_CH_ENABLE))
+		cohc->stopped = 1;
+
+
+	val &= ~COH901318_CX_CFG_CH_ENABLE;
+	/* Enable twice, HW bug work around */
+	writel(val, virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING * channel);
+	writel(val, virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING * channel);
+
+	/* Spin-wait for it to actually go inactive */
+	while (readl(virtbase + COH901318_CX_STAT+COH901318_CX_STAT_SPACING *
+		     channel) & COH901318_CX_STAT_ACTIVE)
+		cpu_relax();
+
+	/* Check if we stopped an active job */
+	if ((readl(virtbase + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING *
+		   channel) & COH901318_CX_CTRL_TC_VALUE_MASK) > 0)
+		cohc->stopped = 1;
+
+	enable_powersave(cohc);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+}
+
+/* Resumes a transfer that has been stopped via 300_dma_stop(..).
+   Power save is handled.
+*/
+static void coh901318_resume(struct dma_chan *chan)
+{
+	u32 val;
+	unsigned long flags;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	int channel = cohc->id;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	disable_powersave(cohc);
+
+	if (cohc->stopped) {
+		/* Enable channel in HW */
+		val = readl(cohc->base->virtbase + COH901318_CX_CFG +
+			    COH901318_CX_CFG_SPACING * channel);
+
+		val |= COH901318_CX_CFG_CH_ENABLE;
+
+		writel(val, cohc->base->virtbase + COH901318_CX_CFG +
+		       COH901318_CX_CFG_SPACING*channel);
+
+		cohc->stopped = 0;
+	}
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+}
+
+bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned int ch_nr = (unsigned int) chan_id;
+
+	if (ch_nr == to_coh901318_chan(chan)->id)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(coh901318_filter_id);
+
+/*
+ * DMA channel allocation
+ */
+static int coh901318_config(struct coh901318_chan *cohc,
+			    struct coh901318_params *param)
+{
+	unsigned long flags;
+	const struct coh901318_params *p;
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	if (param)
+		p = param;
+	else
+		p = &cohc->base->platform->chan_conf[channel].param;
+
+	/* Clear any pending BE or TC interrupt */
+	if (channel < 32) {
+		writel(1 << channel, virtbase + COH901318_BE_INT_CLEAR1);
+		writel(1 << channel, virtbase + COH901318_TC_INT_CLEAR1);
+	} else {
+		writel(1 << (channel - 32), virtbase +
+		       COH901318_BE_INT_CLEAR2);
+		writel(1 << (channel - 32), virtbase +
+		       COH901318_TC_INT_CLEAR2);
+	}
+
+	coh901318_set_conf(cohc, p->config);
+	coh901318_set_ctrl(cohc, p->ctrl_lli_last);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return 0;
+}
+
+/* must lock when calling this function
+ * start queued jobs, if any
+ * TODO: start all queued jobs in one go
+ *
+ * Returns descriptor if queued job is started otherwise NULL.
+ * If the queue is empty NULL is returned.
+ */
+static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
+{
+	struct coh901318_desc *cohd;
+
+	/*
+	 * start queued jobs, if any
+	 * TODO: transmit all queued jobs in one go
+	 */
+	cohd = coh901318_first_queued(cohc);
+
+	if (cohd != NULL) {
+		/* Remove from queue */
+		coh901318_desc_remove(cohd);
+		/* initiate DMA job */
+		cohc->busy = 1;
+
+		coh901318_desc_submit(cohc, cohd);
+
+		/* Program the transaction head */
+		coh901318_set_conf(cohc, cohd->head_config);
+		coh901318_set_ctrl(cohc, cohd->head_ctrl);
+		coh901318_prep_linked_list(cohc, cohd->lli);
+
+		/* start dma job on this channel */
+		coh901318_start(cohc);
+
+	}
+
+	return cohd;
+}
+
+/*
+ * This tasklet is called from the interrupt handler to
+ * handle each descriptor (DMA job) that is sent to a channel.
+ */
+static void dma_tasklet(unsigned long data)
+{
+	struct coh901318_chan *cohc = (struct coh901318_chan *) data;
+	struct coh901318_desc *cohd_fin;
+	unsigned long flags;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d"
+		 " nbr_active_done %ld\n", __func__,
+		 cohc->id, cohc->nbr_active_done);
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* get first active descriptor entry from list */
+	cohd_fin = coh901318_first_active_get(cohc);
+
+	if (cohd_fin == NULL)
+		goto err;
+
+	/* locate callback to client */
+	callback = cohd_fin->desc.callback;
+	callback_param = cohd_fin->desc.callback_param;
+
+	/* sign this job as completed on the channel */
+	dma_cookie_complete(&cohd_fin->desc);
+
+	/* release the lli allocation and remove the descriptor */
+	coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli);
+
+	/* return desc to free-list */
+	coh901318_desc_remove(cohd_fin);
+	coh901318_desc_free(cohc, cohd_fin);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	/* Call the callback when we're done */
+	if (callback)
+		callback(callback_param);
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * If another interrupt fired while the tasklet was scheduling,
+	 * we don't get called twice, so we have this number of active
+	 * counter that keep track of the number of IRQs expected to
+	 * be handled for this channel. If there happen to be more than
+	 * one IRQ to be ack:ed, we simply schedule this tasklet again.
+	 */
+	cohc->nbr_active_done--;
+	if (cohc->nbr_active_done) {
+		dev_dbg(COHC_2_DEV(cohc), "scheduling tasklet again, new IRQs "
+			"came in while we were scheduling this tasklet\n");
+		if (cohc_chan_conf(cohc)->priority_high)
+			tasklet_hi_schedule(&cohc->tasklet);
+		else
+			tasklet_schedule(&cohc->tasklet);
+	}
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return;
+
+ err:
+	spin_unlock_irqrestore(&cohc->lock, flags);
+	dev_err(COHC_2_DEV(cohc), "[%s] No active dma desc\n", __func__);
+}
+
+
+/* called from interrupt context */
+static void dma_tc_handle(struct coh901318_chan *cohc)
+{
+	/*
+	 * If the channel is not allocated, then we shouldn't have
+	 * any TC interrupts on it.
+	 */
+	if (!cohc->allocated) {
+		dev_err(COHC_2_DEV(cohc), "spurious interrupt from "
+			"unallocated channel\n");
+		return;
+	}
+
+	spin_lock(&cohc->lock);
+
+	/*
+	 * When we reach this point, at least one queue item
+	 * should have been moved over from cohc->queue to
+	 * cohc->active and run to completion, that is why we're
+	 * getting a terminal count interrupt is it not?
+	 * If you get this BUG() the most probable cause is that
+	 * the individual nodes in the lli chain have IRQ enabled,
+	 * so check your platform config for lli chain ctrl.
+	 */
+	BUG_ON(list_empty(&cohc->active));
+
+	cohc->nbr_active_done++;
+
+	/*
+	 * This attempt to take a job from cohc->queue, put it
+	 * into cohc->active and start it.
+	 */
+	if (coh901318_queue_start(cohc) == NULL)
+		cohc->busy = 0;
+
+	spin_unlock(&cohc->lock);
+
+	/*
+	 * This tasklet will remove items from cohc->active
+	 * and thus terminates them.
+	 */
+	if (cohc_chan_conf(cohc)->priority_high)
+		tasklet_hi_schedule(&cohc->tasklet);
+	else
+		tasklet_schedule(&cohc->tasklet);
+}
+
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+	u32 status1;
+	u32 status2;
+	int i;
+	int ch;
+	struct coh901318_base *base  = dev_id;
+	struct coh901318_chan *cohc;
+	void __iomem *virtbase = base->virtbase;
+
+	status1 = readl(virtbase + COH901318_INT_STATUS1);
+	status2 = readl(virtbase + COH901318_INT_STATUS2);
+
+	if (unlikely(status1 == 0 && status2 == 0)) {
+		dev_warn(base->dev, "spurious DMA IRQ from no channel!\n");
+		return IRQ_HANDLED;
+	}
+
+	/* TODO: consider handle IRQ in tasklet here to
+	 *       minimize interrupt latency */
+
+	/* Check the first 32 DMA channels for IRQ */
+	while (status1) {
+		/* Find first bit set, return as a number. */
+		i = ffs(status1) - 1;
+		ch = i;
+
+		cohc = &base->chans[ch];
+		spin_lock(&cohc->lock);
+
+		/* Mask off this bit */
+		status1 &= ~(1 << i);
+		/* Check the individual channel bits */
+		if (test_bit(i, virtbase + COH901318_BE_INT_STATUS1)) {
+			dev_crit(COHC_2_DEV(cohc),
+				 "DMA bus error on channel %d!\n", ch);
+			BUG_ON(1);
+			/* Clear BE interrupt */
+			__set_bit(i, virtbase + COH901318_BE_INT_CLEAR1);
+		} else {
+			/* Caused by TC, really? */
+			if (unlikely(!test_bit(i, virtbase +
+					       COH901318_TC_INT_STATUS1))) {
+				dev_warn(COHC_2_DEV(cohc),
+					 "ignoring interrupt not caused by terminal count on channel %d\n", ch);
+				/* Clear TC interrupt */
+				BUG_ON(1);
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
+			} else {
+				/* Enable powersave if transfer has finished */
+				if (!(readl(virtbase + COH901318_CX_STAT +
+					    COH901318_CX_STAT_SPACING*ch) &
+				      COH901318_CX_STAT_ENABLED)) {
+					enable_powersave(cohc);
+				}
+
+				/* Must clear TC interrupt before calling
+				 * dma_tc_handle
+				 * in case tc_handle initiate a new dma job
+				 */
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
+
+				dma_tc_handle(cohc);
+			}
+		}
+		spin_unlock(&cohc->lock);
+	}
+
+	/* Check the remaining 32 DMA channels for IRQ */
+	while (status2) {
+		/* Find first bit set, return as a number. */
+		i = ffs(status2) - 1;
+		ch = i + 32;
+		cohc = &base->chans[ch];
+		spin_lock(&cohc->lock);
+
+		/* Mask off this bit */
+		status2 &= ~(1 << i);
+		/* Check the individual channel bits */
+		if (test_bit(i, virtbase + COH901318_BE_INT_STATUS2)) {
+			dev_crit(COHC_2_DEV(cohc),
+				 "DMA bus error on channel %d!\n", ch);
+			/* Clear BE interrupt */
+			BUG_ON(1);
+			__set_bit(i, virtbase + COH901318_BE_INT_CLEAR2);
+		} else {
+			/* Caused by TC, really? */
+			if (unlikely(!test_bit(i, virtbase +
+					       COH901318_TC_INT_STATUS2))) {
+				dev_warn(COHC_2_DEV(cohc),
+					 "ignoring interrupt not caused by terminal count on channel %d\n", ch);
+				/* Clear TC interrupt */
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
+				BUG_ON(1);
+			} else {
+				/* Enable powersave if transfer has finished */
+				if (!(readl(virtbase + COH901318_CX_STAT +
+					    COH901318_CX_STAT_SPACING*ch) &
+				      COH901318_CX_STAT_ENABLED)) {
+					enable_powersave(cohc);
+				}
+				/* Must clear TC interrupt before calling
+				 * dma_tc_handle
+				 * in case tc_handle initiate a new dma job
+				 */
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
+
+				dma_tc_handle(cohc);
+			}
+		}
+		spin_unlock(&cohc->lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int coh901318_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct coh901318_chan	*cohc = to_coh901318_chan(chan);
+	unsigned long flags;
+
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n",
+		 __func__, cohc->id);
+
+	if (chan->client_count > 1)
+		return -EBUSY;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	coh901318_config(cohc, NULL);
+
+	cohc->allocated = 1;
+	dma_cookie_init(chan);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return 1;
+}
+
+static void
+coh901318_free_chan_resources(struct dma_chan *chan)
+{
+	struct coh901318_chan	*cohc = to_coh901318_chan(chan);
+	int channel = cohc->id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* Disable HW */
+	writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING*channel);
+	writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CTRL +
+	       COH901318_CX_CTRL_SPACING*channel);
+
+	cohc->allocated = 0;
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+
+static dma_cookie_t
+coh901318_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct coh901318_desc *cohd = container_of(tx, struct coh901318_desc,
+						   desc);
+	struct coh901318_chan *cohc = to_coh901318_chan(tx->chan);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	coh901318_desc_queue(cohc, cohd);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		      size_t size, unsigned long flags)
+{
+	struct coh901318_lli *lli;
+	struct coh901318_desc *cohd;
+	unsigned long flg;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	int lli_len;
+	u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+	int ret;
+
+	spin_lock_irqsave(&cohc->lock, flg);
+
+	dev_vdbg(COHC_2_DEV(cohc),
+		 "[%s] channel %d src 0x%x dest 0x%x size %d\n",
+		 __func__, cohc->id, src, dest, size);
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last lli */
+		ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
+
+	lli_len = size >> MAX_DMA_PACKET_SIZE_SHIFT;
+	if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size)
+		lli_len++;
+
+	lli = coh901318_lli_alloc(&cohc->base->pool, lli_len);
+
+	if (lli == NULL)
+		goto err;
+
+	ret = coh901318_lli_fill_memcpy(
+		&cohc->base->pool, lli, src, size, dest,
+		cohc_chan_param(cohc)->ctrl_lli_chained,
+		ctrl_last);
+	if (ret)
+		goto err;
+
+	COH_DBG(coh901318_list_print(cohc, lli));
+
+	/* Pick a descriptor to handle this transfer */
+	cohd = coh901318_desc_get(cohc);
+	cohd->lli = lli;
+	cohd->flags = flags;
+	cohd->desc.tx_submit = coh901318_tx_submit;
+
+	spin_unlock_irqrestore(&cohc->lock, flg);
+
+	return &cohd->desc;
+ err:
+	spin_unlock_irqrestore(&cohc->lock, flg);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction direction,
+			unsigned long flags, void *context)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	struct coh901318_lli *lli;
+	struct coh901318_desc *cohd;
+	const struct coh901318_params *params;
+	struct scatterlist *sg;
+	int len = 0;
+	int size;
+	int i;
+	u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained;
+	u32 ctrl = cohc_chan_param(cohc)->ctrl_lli;
+	u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+	u32 config;
+	unsigned long flg;
+	int ret;
+
+	if (!sgl)
+		goto out;
+	if (sgl->length == 0)
+		goto out;
+
+	spin_lock_irqsave(&cohc->lock, flg);
+
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] sg_len %d dir %d\n",
+		 __func__, sg_len, direction);
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last lli */
+		ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
+
+	params = cohc_chan_param(cohc);
+	config = params->config;
+	/*
+	 * Add runtime-specific control on top, make
+	 * sure the bits you set per peripheral channel are
+	 * cleared in the default config from the platform.
+	 */
+	ctrl_chained |= cohc->runtime_ctrl;
+	ctrl_last |= cohc->runtime_ctrl;
+	ctrl |= cohc->runtime_ctrl;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
+			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
+
+		config |= COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY;
+		ctrl_chained |= tx_flags;
+		ctrl_last |= tx_flags;
+		ctrl |= tx_flags;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
+			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
+
+		config |= COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY;
+		ctrl_chained |= rx_flags;
+		ctrl_last |= rx_flags;
+		ctrl |= rx_flags;
+	} else
+		goto err_direction;
+
+	/* The dma only supports transmitting packages up to
+	 * MAX_DMA_PACKET_SIZE. Calculate to total number of
+	 * dma elemts required to send the entire sg list
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		unsigned int factor;
+		size = sg_dma_len(sg);
+
+		if (size <= MAX_DMA_PACKET_SIZE) {
+			len++;
+			continue;
+		}
+
+		factor = size >> MAX_DMA_PACKET_SIZE_SHIFT;
+		if ((factor << MAX_DMA_PACKET_SIZE_SHIFT) < size)
+			factor++;
+
+		len += factor;
+	}
+
+	pr_debug("Allocate %d lli:s for this transfer\n", len);
+	lli = coh901318_lli_alloc(&cohc->base->pool, len);
+
+	if (lli == NULL)
+		goto err_dma_alloc;
+
+	/* initiate allocated lli list */
+	ret = coh901318_lli_fill_sg(&cohc->base->pool, lli, sgl, sg_len,
+				    cohc_dev_addr(cohc),
+				    ctrl_chained,
+				    ctrl,
+				    ctrl_last,
+				    direction, COH901318_CX_CTRL_TC_IRQ_ENABLE);
+	if (ret)
+		goto err_lli_fill;
+
+
+	COH_DBG(coh901318_list_print(cohc, lli));
+
+	/* Pick a descriptor to handle this transfer */
+	cohd = coh901318_desc_get(cohc);
+	cohd->head_config = config;
+	/*
+	 * Set the default head ctrl for the channel to the one from the
+	 * lli, things may have changed due to odd buffer alignment
+	 * etc.
+	 */
+	cohd->head_ctrl = lli->control;
+	cohd->dir = direction;
+	cohd->flags = flags;
+	cohd->desc.tx_submit = coh901318_tx_submit;
+	cohd->lli = lli;
+
+	spin_unlock_irqrestore(&cohc->lock, flg);
+
+	return &cohd->desc;
+ err_lli_fill:
+ err_dma_alloc:
+ err_direction:
+	spin_unlock_irqrestore(&cohc->lock, flg);
+ out:
+	return NULL;
+}
+
+static enum dma_status
+coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		 struct dma_tx_state *txstate)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	/* FIXME: should be conditional on ret != DMA_SUCCESS? */
+	dma_set_residue(txstate, coh901318_get_bytes_left(chan));
+
+	if (ret == DMA_IN_PROGRESS && cohc->stopped)
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void
+coh901318_issue_pending(struct dma_chan *chan)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * Busy means that pending jobs are already being processed,
+	 * and then there is no point in starting the queue: the
+	 * terminal count interrupt on the channel will take the next
+	 * job on the queue and execute it anyway.
+	 */
+	if (!cohc->busy)
+		coh901318_queue_start(cohc);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+}
+
+/*
+ * Here we wrap in the runtime dma control interface
+ */
+struct burst_table {
+	int burst_8bit;
+	int burst_16bit;
+	int burst_32bit;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burst_8bit = 64,
+		.burst_16bit = 32,
+		.burst_32bit = 16,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_64_BYTES,
+	},
+	{
+		.burst_8bit = 48,
+		.burst_16bit = 24,
+		.burst_32bit = 12,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_48_BYTES,
+	},
+	{
+		.burst_8bit = 32,
+		.burst_16bit = 16,
+		.burst_32bit = 8,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_32_BYTES,
+	},
+	{
+		.burst_8bit = 16,
+		.burst_16bit = 8,
+		.burst_32bit = 4,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_16_BYTES,
+	},
+	{
+		.burst_8bit = 8,
+		.burst_16bit = 4,
+		.burst_32bit = 2,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_8_BYTES,
+	},
+	{
+		.burst_8bit = 4,
+		.burst_16bit = 2,
+		.burst_32bit = 1,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_4_BYTES,
+	},
+	{
+		.burst_8bit = 2,
+		.burst_16bit = 1,
+		.burst_32bit = 0,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_2_BYTES,
+	},
+	{
+		.burst_8bit = 1,
+		.burst_16bit = 0,
+		.burst_32bit = 0,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_1_BYTE,
+	},
+};
+
+static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
+			struct dma_slave_config *config)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	dma_addr_t addr;
+	enum dma_slave_buswidth addr_width;
+	u32 maxburst;
+	u32 runtime_ctrl = 0;
+	int i = 0;
+
+	/* We only support mem to per or per to mem transfers */
+	if (config->direction == DMA_DEV_TO_MEM) {
+		addr = config->src_addr;
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else if (config->direction == DMA_MEM_TO_DEV) {
+		addr = config->dst_addr;
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else {
+		dev_err(COHC_2_DEV(cohc), "illegal channel mode\n");
+		return;
+	}
+
+	dev_dbg(COHC_2_DEV(cohc), "configure channel for %d byte transfers\n",
+		addr_width);
+	switch (addr_width)  {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		runtime_ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_8bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		runtime_ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_16bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		/* Direction doesn't matter here, it's 32/32 bits */
+		runtime_ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_32bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	default:
+		dev_err(COHC_2_DEV(cohc),
+			"bad runtimeconfig: alien address width\n");
+		return;
+	}
+
+	runtime_ctrl |= burst_sizes[i].reg;
+	dev_dbg(COHC_2_DEV(cohc),
+		"selected burst size %d bytes for address width %d bytes, maxburst %d\n",
+		burst_sizes[i].burst_8bit, addr_width, maxburst);
+
+	cohc->runtime_addr = addr;
+	cohc->runtime_ctrl = runtime_ctrl;
+}
+
+static int
+coh901318_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		  unsigned long arg)
+{
+	unsigned long flags;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	struct coh901318_desc *cohd;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	if (cmd == DMA_SLAVE_CONFIG) {
+		struct dma_slave_config *config =
+			(struct dma_slave_config *) arg;
+
+		coh901318_dma_set_runtimeconfig(chan, config);
+		return 0;
+	  }
+
+	if (cmd == DMA_PAUSE) {
+		coh901318_pause(chan);
+		return 0;
+	}
+
+	if (cmd == DMA_RESUME) {
+		coh901318_resume(chan);
+		return 0;
+	}
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	/* The remainder of this function terminates the transfer */
+	coh901318_pause(chan);
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* Clear any pending BE or TC interrupt */
+	if (cohc->id < 32) {
+		writel(1 << cohc->id, virtbase + COH901318_BE_INT_CLEAR1);
+		writel(1 << cohc->id, virtbase + COH901318_TC_INT_CLEAR1);
+	} else {
+		writel(1 << (cohc->id - 32), virtbase +
+		       COH901318_BE_INT_CLEAR2);
+		writel(1 << (cohc->id - 32), virtbase +
+		       COH901318_TC_INT_CLEAR2);
+	}
+
+	enable_powersave(cohc);
+
+	while ((cohd = coh901318_first_active_get(cohc))) {
+		/* release the lli allocation*/
+		coh901318_lli_free(&cohc->base->pool, &cohd->lli);
+
+		/* return desc to free-list */
+		coh901318_desc_remove(cohd);
+		coh901318_desc_free(cohc, cohd);
+	}
+
+	while ((cohd = coh901318_first_queued(cohc))) {
+		/* release the lli allocation*/
+		coh901318_lli_free(&cohc->base->pool, &cohd->lli);
+
+		/* return desc to free-list */
+		coh901318_desc_remove(cohd);
+		coh901318_desc_free(cohc, cohd);
+	}
+
+
+	cohc->nbr_active_done = 0;
+	cohc->busy = 0;
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return 0;
+}
+
+void coh901318_base_init(struct dma_device *dma, const int *pick_chans,
+			 struct coh901318_base *base)
+{
+	int chans_i;
+	int i = 0;
+	struct coh901318_chan *cohc;
+
+	INIT_LIST_HEAD(&dma->channels);
+
+	for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) {
+		for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) {
+			cohc = &base->chans[i];
+
+			cohc->base = base;
+			cohc->chan.device = dma;
+			cohc->id = i;
+
+			/* TODO: do we really need this lock if only one
+			 * client is connected to each channel?
+			 */
+
+			spin_lock_init(&cohc->lock);
+
+			cohc->nbr_active_done = 0;
+			cohc->busy = 0;
+			INIT_LIST_HEAD(&cohc->free);
+			INIT_LIST_HEAD(&cohc->active);
+			INIT_LIST_HEAD(&cohc->queue);
+
+			tasklet_init(&cohc->tasklet, dma_tasklet,
+				     (unsigned long) cohc);
+
+			list_add_tail(&cohc->chan.device_node,
+				      &dma->channels);
+		}
+	}
+}
+
+static int __init coh901318_probe(struct platform_device *pdev)
+{
+	int err = 0;
+	struct coh901318_platform *pdata;
+	struct coh901318_base *base;
+	int irq;
+	struct resource *io;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		goto err_get_resource;
+
+	/* Map DMA controller registers to virtual memory */
+	if (request_mem_region(io->start,
+			       resource_size(io),
+			       pdev->dev.driver->name) == NULL) {
+		err = -EBUSY;
+		goto err_request_mem;
+	}
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata)
+		goto err_no_platformdata;
+
+	base = kmalloc(ALIGN(sizeof(struct coh901318_base), 4) +
+		       pdata->max_channels *
+		       sizeof(struct coh901318_chan),
+		       GFP_KERNEL);
+	if (!base)
+		goto err_alloc_coh_dma_channels;
+
+	base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4);
+
+	base->virtbase = ioremap(io->start, resource_size(io));
+	if (!base->virtbase) {
+		err = -ENOMEM;
+		goto err_no_ioremap;
+	}
+
+	base->dev = &pdev->dev;
+	base->platform = pdata;
+	spin_lock_init(&base->pm.lock);
+	base->pm.started_channels = 0;
+
+	COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base);
+
+	platform_set_drvdata(pdev, base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		goto err_no_irq;
+
+	err = request_irq(irq, dma_irq_handler, IRQF_DISABLED,
+			  "coh901318", base);
+	if (err) {
+		dev_crit(&pdev->dev,
+			 "Cannot allocate IRQ for DMA controller!\n");
+		goto err_request_irq;
+	}
+
+	err = coh901318_pool_create(&base->pool, &pdev->dev,
+				    sizeof(struct coh901318_lli),
+				    32);
+	if (err)
+		goto err_pool_create;
+
+	/* init channels for device transfers */
+	coh901318_base_init(&base->dma_slave,  base->platform->chans_slave,
+			    base);
+
+	dma_cap_zero(base->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+
+	base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources;
+	base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources;
+	base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg;
+	base->dma_slave.device_tx_status = coh901318_tx_status;
+	base->dma_slave.device_issue_pending = coh901318_issue_pending;
+	base->dma_slave.device_control = coh901318_control;
+	base->dma_slave.dev = &pdev->dev;
+
+	err = dma_async_device_register(&base->dma_slave);
+
+	if (err)
+		goto err_register_slave;
+
+	/* init channels for memcpy */
+	coh901318_base_init(&base->dma_memcpy, base->platform->chans_memcpy,
+			    base);
+
+	dma_cap_zero(base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+
+	base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources;
+	base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources;
+	base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy;
+	base->dma_memcpy.device_tx_status = coh901318_tx_status;
+	base->dma_memcpy.device_issue_pending = coh901318_issue_pending;
+	base->dma_memcpy.device_control = coh901318_control;
+	base->dma_memcpy.dev = &pdev->dev;
+	/*
+	 * This controller can only access address at even 32bit boundaries,
+	 * i.e. 2^2
+	 */
+	base->dma_memcpy.copy_align = 2;
+	err = dma_async_device_register(&base->dma_memcpy);
+
+	if (err)
+		goto err_register_memcpy;
+
+	dev_info(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n",
+		(u32) base->virtbase);
+
+	return err;
+
+ err_register_memcpy:
+	dma_async_device_unregister(&base->dma_slave);
+ err_register_slave:
+	coh901318_pool_destroy(&base->pool);
+ err_pool_create:
+	free_irq(platform_get_irq(pdev, 0), base);
+ err_request_irq:
+ err_no_irq:
+	iounmap(base->virtbase);
+ err_no_ioremap:
+	kfree(base);
+ err_alloc_coh_dma_channels:
+ err_no_platformdata:
+	release_mem_region(pdev->resource->start,
+			   resource_size(pdev->resource));
+ err_request_mem:
+ err_get_resource:
+	return err;
+}
+
+static int __exit coh901318_remove(struct platform_device *pdev)
+{
+	struct coh901318_base *base = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&base->dma_memcpy);
+	dma_async_device_unregister(&base->dma_slave);
+	coh901318_pool_destroy(&base->pool);
+	free_irq(platform_get_irq(pdev, 0), base);
+	iounmap(base->virtbase);
+	kfree(base);
+	release_mem_region(pdev->resource->start,
+			   resource_size(pdev->resource));
+	return 0;
+}
+
+
+static struct platform_driver coh901318_driver = {
+	.remove = __exit_p(coh901318_remove),
+	.driver = {
+		.name	= "coh901318",
+	},
+};
+
+int __init coh901318_init(void)
+{
+	return platform_driver_probe(&coh901318_driver, coh901318_probe);
+}
+subsys_initcall(coh901318_init);
+
+void __exit coh901318_exit(void)
+{
+	platform_driver_unregister(&coh901318_driver);
+}
+module_exit(coh901318_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Per Friden");
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
new file mode 100644
index 00000000..6c0e2d4c
--- /dev/null
+++ b/drivers/dma/coh901318_lli.c
@@ -0,0 +1,313 @@
+/*
+ * driver/dma/coh901318_lli.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Support functions for handling lli for dma
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/memory.h>
+#include <linux/gfp.h>
+#include <linux/dmapool.h>
+#include <mach/coh901318.h>
+
+#include "coh901318_lli.h"
+
+#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
+#define DEBUGFS_POOL_COUNTER_RESET(pool) (pool->debugfs_pool_counter = 0)
+#define DEBUGFS_POOL_COUNTER_ADD(pool, add) (pool->debugfs_pool_counter += add)
+#else
+#define DEBUGFS_POOL_COUNTER_RESET(pool)
+#define DEBUGFS_POOL_COUNTER_ADD(pool, add)
+#endif
+
+static struct coh901318_lli *
+coh901318_lli_next(struct coh901318_lli *data)
+{
+	if (data == NULL || data->link_addr == 0)
+		return NULL;
+
+	return (struct coh901318_lli *) data->virt_link_addr;
+}
+
+int coh901318_pool_create(struct coh901318_pool *pool,
+			  struct device *dev,
+			  size_t size, size_t align)
+{
+	spin_lock_init(&pool->lock);
+	pool->dev = dev;
+	pool->dmapool = dma_pool_create("lli_pool", dev, size, align, 0);
+
+	DEBUGFS_POOL_COUNTER_RESET(pool);
+	return 0;
+}
+
+int coh901318_pool_destroy(struct coh901318_pool *pool)
+{
+
+	dma_pool_destroy(pool->dmapool);
+	return 0;
+}
+
+struct coh901318_lli *
+coh901318_lli_alloc(struct coh901318_pool *pool, unsigned int len)
+{
+	int i;
+	struct coh901318_lli *head;
+	struct coh901318_lli *lli;
+	struct coh901318_lli *lli_prev;
+	dma_addr_t phy;
+
+	if (len == 0)
+		goto err;
+
+	spin_lock(&pool->lock);
+
+	head = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
+
+	if (head == NULL)
+		goto err;
+
+	DEBUGFS_POOL_COUNTER_ADD(pool, 1);
+
+	lli = head;
+	lli->phy_this = phy;
+	lli->link_addr = 0x00000000;
+	lli->virt_link_addr = 0x00000000U;
+
+	for (i = 1; i < len; i++) {
+		lli_prev = lli;
+
+		lli = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
+
+		if (lli == NULL)
+			goto err_clean_up;
+
+		DEBUGFS_POOL_COUNTER_ADD(pool, 1);
+		lli->phy_this = phy;
+		lli->link_addr = 0x00000000;
+		lli->virt_link_addr = 0x00000000U;
+
+		lli_prev->link_addr = phy;
+		lli_prev->virt_link_addr = lli;
+	}
+
+	spin_unlock(&pool->lock);
+
+	return head;
+
+ err:
+	spin_unlock(&pool->lock);
+	return NULL;
+
+ err_clean_up:
+	lli_prev->link_addr = 0x00000000U;
+	spin_unlock(&pool->lock);
+	coh901318_lli_free(pool, &head);
+	return NULL;
+}
+
+void coh901318_lli_free(struct coh901318_pool *pool,
+			struct coh901318_lli **lli)
+{
+	struct coh901318_lli *l;
+	struct coh901318_lli *next;
+
+	if (lli == NULL)
+		return;
+
+	l = *lli;
+
+	if (l == NULL)
+		return;
+
+	spin_lock(&pool->lock);
+
+	while (l->link_addr) {
+		next = l->virt_link_addr;
+		dma_pool_free(pool->dmapool, l, l->phy_this);
+		DEBUGFS_POOL_COUNTER_ADD(pool, -1);
+		l = next;
+	}
+	dma_pool_free(pool->dmapool, l, l->phy_this);
+	DEBUGFS_POOL_COUNTER_ADD(pool, -1);
+
+	spin_unlock(&pool->lock);
+	*lli = NULL;
+}
+
+int
+coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t source, unsigned int size,
+			  dma_addr_t destination, u32 ctrl_chained,
+			  u32 ctrl_eom)
+{
+	int s = size;
+	dma_addr_t src = source;
+	dma_addr_t dst = destination;
+
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+
+	while (lli->link_addr) {
+		lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
+		lli->src_addr = src;
+		lli->dst_addr = dst;
+
+		s -= MAX_DMA_PACKET_SIZE;
+		lli = coh901318_lli_next(lli);
+
+		src += MAX_DMA_PACKET_SIZE;
+		dst += MAX_DMA_PACKET_SIZE;
+	}
+
+	lli->control = ctrl_eom | s;
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+
+	return 0;
+}
+
+int
+coh901318_lli_fill_single(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t buf, unsigned int size,
+			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
+			  enum dma_transfer_direction dir)
+{
+	int s = size;
+	dma_addr_t src;
+	dma_addr_t dst;
+
+
+	if (dir == DMA_MEM_TO_DEV) {
+		src = buf;
+		dst = dev_addr;
+
+	} else if (dir == DMA_DEV_TO_MEM) {
+
+		src = dev_addr;
+		dst = buf;
+	} else {
+		return -EINVAL;
+	}
+
+	while (lli->link_addr) {
+		size_t block_size = MAX_DMA_PACKET_SIZE;
+		lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
+
+		/* If we are on the next-to-final block and there will
+		 * be less than half a DMA packet left for the last
+		 * block, then we want to make this block a little
+		 * smaller to balance the sizes. This is meant to
+		 * avoid too small transfers if the buffer size is
+		 * (MAX_DMA_PACKET_SIZE*N + 1) */
+		if (s < (MAX_DMA_PACKET_SIZE + MAX_DMA_PACKET_SIZE/2))
+			block_size = MAX_DMA_PACKET_SIZE/2;
+
+		s -= block_size;
+		lli->src_addr = src;
+		lli->dst_addr = dst;
+
+		lli = coh901318_lli_next(lli);
+
+		if (dir == DMA_MEM_TO_DEV)
+			src += block_size;
+		else if (dir == DMA_DEV_TO_MEM)
+			dst += block_size;
+	}
+
+	lli->control = ctrl_eom | s;
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+
+	return 0;
+}
+
+int
+coh901318_lli_fill_sg(struct coh901318_pool *pool,
+		      struct coh901318_lli *lli,
+		      struct scatterlist *sgl, unsigned int nents,
+		      dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
+		      u32 ctrl_last,
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask)
+{
+	int i;
+	struct scatterlist *sg;
+	u32 ctrl_sg;
+	dma_addr_t src = 0;
+	dma_addr_t dst = 0;
+	u32 bytes_to_transfer;
+	u32 elem_size;
+
+	if (lli == NULL)
+		goto err;
+
+	spin_lock(&pool->lock);
+
+	if (dir == DMA_MEM_TO_DEV)
+		dst = dev_addr;
+	else if (dir == DMA_DEV_TO_MEM)
+		src = dev_addr;
+	else
+		goto err;
+
+	for_each_sg(sgl, sg, nents, i) {
+		if (sg_is_chain(sg)) {
+			/* sg continues to the next sg-element don't
+			 * send ctrl_finish until the last
+			 * sg-element in the chain
+			 */
+			ctrl_sg = ctrl_chained;
+		} else if (i == nents - 1)
+			ctrl_sg = ctrl_last;
+		else
+			ctrl_sg = ctrl ? ctrl : ctrl_last;
+
+
+		if (dir == DMA_MEM_TO_DEV)
+			/* increment source address */
+			src = sg_phys(sg);
+		else
+			/* increment destination address */
+			dst =  sg_phys(sg);
+
+		bytes_to_transfer = sg_dma_len(sg);
+
+		while (bytes_to_transfer) {
+			u32 val;
+
+			if (bytes_to_transfer > MAX_DMA_PACKET_SIZE) {
+				elem_size = MAX_DMA_PACKET_SIZE;
+				val = ctrl_chained;
+			} else {
+				elem_size = bytes_to_transfer;
+				val = ctrl_sg;
+			}
+
+			lli->control = val | elem_size;
+			lli->src_addr = src;
+			lli->dst_addr = dst;
+
+			if (dir == DMA_DEV_TO_MEM)
+				dst += elem_size;
+			else
+				src += elem_size;
+
+			BUG_ON(lli->link_addr & 3);
+
+			bytes_to_transfer -= elem_size;
+			lli = coh901318_lli_next(lli);
+		}
+
+	}
+	spin_unlock(&pool->lock);
+
+	return 0;
+ err:
+	spin_unlock(&pool->lock);
+	return -EINVAL;
+}
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
new file mode 100644
index 00000000..abff3714
--- /dev/null
+++ b/drivers/dma/coh901318_lli.h
@@ -0,0 +1,124 @@
+/*
+ * driver/dma/coh901318_lli.h
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Support functions for handling lli for coh901318
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#ifndef COH901318_LLI_H
+#define COH901318_LLI_H
+
+#include <mach/coh901318.h>
+
+struct device;
+
+struct coh901318_pool {
+	spinlock_t lock;
+	struct dma_pool *dmapool;
+	struct device *dev;
+
+#ifdef CONFIG_DEBUG_FS
+	int debugfs_pool_counter;
+#endif
+};
+
+struct device;
+/**
+ * coh901318_pool_create() - Creates an dma pool for lli:s
+ * @pool: pool handle
+ * @dev: dma device
+ * @lli_nbr: number of lli:s in the pool
+ * @algin: address alignemtn of lli:s
+ * returns 0 on success otherwise none zero
+ */
+int coh901318_pool_create(struct coh901318_pool *pool,
+			  struct device *dev,
+			  size_t lli_nbr, size_t align);
+
+/**
+ * coh901318_pool_destroy() - Destroys the dma pool
+ * @pool: pool handle
+ * returns 0 on success otherwise none zero
+ */
+int coh901318_pool_destroy(struct coh901318_pool *pool);
+
+/**
+ * coh901318_lli_alloc() - Allocates a linked list
+ *
+ * @pool: pool handle
+ * @len: length to list
+ * return: none NULL if success otherwise NULL
+ */
+struct coh901318_lli *
+coh901318_lli_alloc(struct coh901318_pool *pool,
+		    unsigned int len);
+
+/**
+ * coh901318_lli_free() - Returns the linked list items to the pool
+ * @pool: pool handle
+ * @lli: reference to lli pointer to be freed
+ */
+void coh901318_lli_free(struct coh901318_pool *pool,
+			struct coh901318_lli **lli);
+
+/**
+ * coh901318_lli_fill_memcpy() - Prepares the lli:s for dma memcpy
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @src: src address
+ * @size: transfer size
+ * @dst: destination address
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl_last: ctrl for the last lli
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t src, unsigned int size,
+			  dma_addr_t dst, u32 ctrl_chained, u32 ctrl_last);
+
+/**
+ * coh901318_lli_fill_single() - Prepares the lli:s for dma single transfer
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @buf: transfer buffer
+ * @size: transfer size
+ * @dev_addr: address of periphal
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl_last: ctrl for the last lli
+ * @dir: direction of transfer (to or from device)
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_single(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t buf, unsigned int size,
+			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
+			  enum dma_transfer_direction dir);
+
+/**
+ * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @sg: scatter gather list
+ * @nents: number of entries in sg
+ * @dev_addr: address of periphal
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl: ctrl of middle lli
+ * @ctrl_last: ctrl for the last lli
+ * @dir: direction of transfer (to or from device)
+ * @ctrl_irq_mask: ctrl mask for CPU interrupt
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_sg(struct coh901318_pool *pool,
+		      struct coh901318_lli *lli,
+		      struct scatterlist *sg, unsigned int nents,
+		      dma_addr_t dev_addr, u32 ctrl_chained,
+		      u32 ctrl, u32 ctrl_last,
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask);
+
+#endif /* COH901318_LLI_H */
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644
index 00000000..2397f6f4
--- /dev/null
+++ b/drivers/dma/dmaengine.c
@@ -0,0 +1,1071 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps a global list of dma_device structs it is protected by a
+ * mutex, dma_list_mutex.
+ *
+ * A subsystem can get access to a channel by calling dmaengine_get() followed
+ * by dma_find_channel(), or if it has need for an exclusive channel it can call
+ * dma_request_channel().  Once a channel is allocated a reference is taken
+ * against its corresponding driver to disable removal.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * See Documentation/dmaengine.txt for more details
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+#include <linux/rculist.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(dma_list_mutex);
+static DEFINE_IDR(dma_idr);
+static LIST_HEAD(dma_device_list);
+static long dmaengine_ref_count;
+
+/* --- sysfs implementation --- */
+
+/**
+ * dev_to_dma_chan - convert a device pointer to the its sysfs container object
+ * @dev - device node
+ *
+ * Must be called under dma_list_mutex
+ */
+static struct dma_chan *dev_to_dma_chan(struct device *dev)
+{
+	struct dma_chan_dev *chan_dev;
+
+	chan_dev = container_of(dev, typeof(*chan_dev), device);
+	return chan_dev->chan;
+}
+
+static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct dma_chan *chan;
+	unsigned long count = 0;
+	int i;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	chan = dev_to_dma_chan(dev);
+	if (chan) {
+		for_each_possible_cpu(i)
+			count += per_cpu_ptr(chan->local, i)->memcpy_count;
+		err = sprintf(buf, "%lu\n", count);
+	} else
+		err = -ENODEV;
+	mutex_unlock(&dma_list_mutex);
+
+	return err;
+}
+
+static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr,
+				      char *buf)
+{
+	struct dma_chan *chan;
+	unsigned long count = 0;
+	int i;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	chan = dev_to_dma_chan(dev);
+	if (chan) {
+		for_each_possible_cpu(i)
+			count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+		err = sprintf(buf, "%lu\n", count);
+	} else
+		err = -ENODEV;
+	mutex_unlock(&dma_list_mutex);
+
+	return err;
+}
+
+static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct dma_chan *chan;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	chan = dev_to_dma_chan(dev);
+	if (chan)
+		err = sprintf(buf, "%d\n", chan->client_count);
+	else
+		err = -ENODEV;
+	mutex_unlock(&dma_list_mutex);
+
+	return err;
+}
+
+static struct device_attribute dma_attrs[] = {
+	__ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
+	__ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
+	__ATTR(in_use, S_IRUGO, show_in_use, NULL),
+	__ATTR_NULL
+};
+
+static void chan_dev_release(struct device *dev)
+{
+	struct dma_chan_dev *chan_dev;
+
+	chan_dev = container_of(dev, typeof(*chan_dev), device);
+	if (atomic_dec_and_test(chan_dev->idr_ref)) {
+		mutex_lock(&dma_list_mutex);
+		idr_remove(&dma_idr, chan_dev->dev_id);
+		mutex_unlock(&dma_list_mutex);
+		kfree(chan_dev->idr_ref);
+	}
+	kfree(chan_dev);
+}
+
+static struct class dma_devclass = {
+	.name		= "dma",
+	.dev_attrs	= dma_attrs,
+	.dev_release	= chan_dev_release,
+};
+
+/* --- client and device registration --- */
+
+#define dma_device_satisfies_mask(device, mask) \
+	__dma_device_satisfies_mask((device), &(mask))
+static int
+__dma_device_satisfies_mask(struct dma_device *device, dma_cap_mask_t *want)
+{
+	dma_cap_mask_t has;
+
+	bitmap_and(has.bits, want->bits, device->cap_mask.bits,
+		DMA_TX_TYPE_END);
+	return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
+}
+
+static struct module *dma_chan_to_owner(struct dma_chan *chan)
+{
+	return chan->device->dev->driver->owner;
+}
+
+/**
+ * balance_ref_count - catch up the channel reference count
+ * @chan - channel to balance ->client_count versus dmaengine_ref_count
+ *
+ * balance_ref_count must be called under dma_list_mutex
+ */
+static void balance_ref_count(struct dma_chan *chan)
+{
+	struct module *owner = dma_chan_to_owner(chan);
+
+	while (chan->client_count < dmaengine_ref_count) {
+		__module_get(owner);
+		chan->client_count++;
+	}
+}
+
+/**
+ * dma_chan_get - try to grab a dma channel's parent driver module
+ * @chan - channel to grab
+ *
+ * Must be called under dma_list_mutex
+ */
+static int dma_chan_get(struct dma_chan *chan)
+{
+	int err = -ENODEV;
+	struct module *owner = dma_chan_to_owner(chan);
+
+	if (chan->client_count) {
+		__module_get(owner);
+		err = 0;
+	} else if (try_module_get(owner))
+		err = 0;
+
+	if (err == 0)
+		chan->client_count++;
+
+	/* allocate upon first client reference */
+	if (chan->client_count == 1 && err == 0) {
+		int desc_cnt = chan->device->device_alloc_chan_resources(chan);
+
+		if (desc_cnt < 0) {
+			err = desc_cnt;
+			chan->client_count = 0;
+			module_put(owner);
+		} else if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
+			balance_ref_count(chan);
+	}
+
+	return err;
+}
+
+/**
+ * dma_chan_put - drop a reference to a dma channel's parent driver module
+ * @chan - channel to release
+ *
+ * Must be called under dma_list_mutex
+ */
+static void dma_chan_put(struct dma_chan *chan)
+{
+	if (!chan->client_count)
+		return; /* this channel failed alloc_chan_resources */
+	chan->client_count--;
+	module_put(dma_chan_to_owner(chan));
+	if (chan->client_count == 0)
+		chan->device->device_free_chan_resources(chan);
+}
+
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+	enum dma_status status;
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+	dma_async_issue_pending(chan);
+	do {
+		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			printk(KERN_ERR "dma_sync_wait_timeout!\n");
+			return DMA_ERROR;
+		}
+	} while (status == DMA_IN_PROGRESS);
+
+	return status;
+}
+EXPORT_SYMBOL(dma_sync_wait);
+
+/**
+ * dma_cap_mask_all - enable iteration over all operation types
+ */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * dma_chan_tbl_ent - tracks channel allocations per core/operation
+ * @chan - associated channel for this entry
+ */
+struct dma_chan_tbl_ent {
+	struct dma_chan *chan;
+};
+
+/**
+ * channel_table - percpu lookup table for memory-to-memory offload providers
+ */
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
+
+static int __init dma_channel_table_init(void)
+{
+	enum dma_transaction_type cap;
+	int err = 0;
+
+	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+	/* 'interrupt', 'private', and 'slave' are channel capabilities,
+	 * but are not associated with an operation so they do not need
+	 * an entry in the channel_table
+	 */
+	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+	clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
+	clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
+
+	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+		channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
+		if (!channel_table[cap]) {
+			err = -ENOMEM;
+			break;
+		}
+	}
+
+	if (err) {
+		pr_err("dmaengine: initialization failure\n");
+		for_each_dma_cap_mask(cap, dma_cap_mask_all)
+			if (channel_table[cap])
+				free_percpu(channel_table[cap]);
+	}
+
+	return err;
+}
+arch_initcall(dma_channel_table_init);
+
+/**
+ * dma_find_channel - find a channel to carry out the operation
+ * @tx_type: transaction type
+ */
+struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
+{
+	return this_cpu_read(channel_table[tx_type]->chan);
+}
+EXPORT_SYMBOL(dma_find_channel);
+
+/*
+ * net_dma_find_channel - find a channel for net_dma
+ * net_dma has alignment requirements
+ */
+struct dma_chan *net_dma_find_channel(void)
+{
+	struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
+	if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
+		return NULL;
+
+	return chan;
+}
+EXPORT_SYMBOL(net_dma_find_channel);
+
+/**
+ * dma_issue_pending_all - flush all pending operations across all channels
+ */
+void dma_issue_pending_all(void)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			if (chan->client_count)
+				device->device_issue_pending(chan);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(dma_issue_pending_all);
+
+/**
+ * nth_chan - returns the nth channel of the given capability
+ * @cap: capability to match
+ * @n: nth channel desired
+ *
+ * Defaults to returning the channel with the desired capability and the
+ * lowest reference count when 'n' cannot be satisfied.  Must be called
+ * under dma_list_mutex.
+ */
+static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+	struct dma_chan *ret = NULL;
+	struct dma_chan *min = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (!dma_has_cap(cap, device->cap_mask) ||
+		    dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node) {
+			if (!chan->client_count)
+				continue;
+			if (!min)
+				min = chan;
+			else if (chan->table_count < min->table_count)
+				min = chan;
+
+			if (n-- == 0) {
+				ret = chan;
+				break; /* done */
+			}
+		}
+		if (ret)
+			break; /* done */
+	}
+
+	if (!ret)
+		ret = min;
+
+	if (ret)
+		ret->table_count++;
+
+	return ret;
+}
+
+/**
+ * dma_channel_rebalance - redistribute the available channels
+ *
+ * Optimize for cpu isolation (each cpu gets a dedicated channel for an
+ * operation type) in the SMP case,  and operation isolation (avoid
+ * multi-tasking channels) in the non-SMP case.  Must be called under
+ * dma_list_mutex.
+ */
+static void dma_channel_rebalance(void)
+{
+	struct dma_chan *chan;
+	struct dma_device *device;
+	int cpu;
+	int cap;
+	int n;
+
+	/* undo the last distribution */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_possible_cpu(cpu)
+			per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			chan->table_count = 0;
+	}
+
+	/* don't populate the channel_table if no clients are available */
+	if (!dmaengine_ref_count)
+		return;
+
+	/* redistribute available channels */
+	n = 0;
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_online_cpu(cpu) {
+			if (num_possible_cpus() > 1)
+				chan = nth_chan(cap, n++);
+			else
+				chan = nth_chan(cap, -1);
+
+			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
+		}
+}
+
+static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_device *dev,
+					  dma_filter_fn fn, void *fn_param)
+{
+	struct dma_chan *chan;
+
+	if (!__dma_device_satisfies_mask(dev, mask)) {
+		pr_debug("%s: wrong capabilities\n", __func__);
+		return NULL;
+	}
+	/* devices with multiple channels need special handling as we need to
+	 * ensure that all channels are either private or public.
+	 */
+	if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask))
+		list_for_each_entry(chan, &dev->channels, device_node) {
+			/* some channels are already publicly allocated */
+			if (chan->client_count)
+				return NULL;
+		}
+
+	list_for_each_entry(chan, &dev->channels, device_node) {
+		if (chan->client_count) {
+			pr_debug("%s: %s busy\n",
+				 __func__, dma_chan_name(chan));
+			continue;
+		}
+		if (fn && !fn(chan, fn_param)) {
+			pr_debug("%s: %s filter said false\n",
+				 __func__, dma_chan_name(chan));
+			continue;
+		}
+		return chan;
+	}
+
+	return NULL;
+}
+
+/**
+ * dma_request_channel - try to allocate an exclusive channel
+ * @mask: capabilities that the channel must satisfy
+ * @fn: optional callback to disposition available channels
+ * @fn_param: opaque parameter to pass to dma_filter_fn
+ */
+struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param)
+{
+	struct dma_device *device, *_d;
+	struct dma_chan *chan = NULL;
+	int err;
+
+	/* Find a channel */
+	mutex_lock(&dma_list_mutex);
+	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+		chan = private_candidate(mask, device, fn, fn_param);
+		if (chan) {
+			/* Found a suitable channel, try to grab, prep, and
+			 * return it.  We first set DMA_PRIVATE to disable
+			 * balance_ref_count as this channel will not be
+			 * published in the general-purpose allocator
+			 */
+			dma_cap_set(DMA_PRIVATE, device->cap_mask);
+			device->privatecnt++;
+			err = dma_chan_get(chan);
+
+			if (err == -ENODEV) {
+				pr_debug("%s: %s module removed\n", __func__,
+					 dma_chan_name(chan));
+				list_del_rcu(&device->global_node);
+			} else if (err)
+				pr_debug("%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
+			else
+				break;
+			if (--device->privatecnt == 0)
+				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+			chan = NULL;
+		}
+	}
+	mutex_unlock(&dma_list_mutex);
+
+	pr_debug("%s: %s (%s)\n", __func__, chan ? "success" : "fail",
+		 chan ? dma_chan_name(chan) : NULL);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(__dma_request_channel);
+
+void dma_release_channel(struct dma_chan *chan)
+{
+	mutex_lock(&dma_list_mutex);
+	WARN_ONCE(chan->client_count != 1,
+		  "chan reference count %d != 1\n", chan->client_count);
+	dma_chan_put(chan);
+	/* drop PRIVATE cap enabled by __dma_request_channel() */
+	if (--chan->device->privatecnt == 0)
+		dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL_GPL(dma_release_channel);
+
+/**
+ * dmaengine_get - register interest in dma_channels
+ */
+void dmaengine_get(void)
+{
+	struct dma_device *device, *_d;
+	struct dma_chan *chan;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	dmaengine_ref_count++;
+
+	/* try to grab channels */
+	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node) {
+			err = dma_chan_get(chan);
+			if (err == -ENODEV) {
+				/* module removed before we could use it */
+				list_del_rcu(&device->global_node);
+				break;
+			} else if (err)
+				pr_err("%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
+		}
+	}
+
+	/* if this is the first reference and there were channels
+	 * waiting we need to rebalance to get those channels
+	 * incorporated into the channel table
+	 */
+	if (dmaengine_ref_count == 1)
+		dma_channel_rebalance();
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_get);
+
+/**
+ * dmaengine_put - let dma drivers be removed when ref_count == 0
+ */
+void dmaengine_put(void)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+
+	mutex_lock(&dma_list_mutex);
+	dmaengine_ref_count--;
+	BUG_ON(dmaengine_ref_count < 0);
+	/* drop channel references */
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			dma_chan_put(chan);
+	}
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_put);
+
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+	/* A device that satisfies this test has channels that will never cause
+	 * an async_tx channel switch event as all possible operation types can
+	 * be handled.
+	 */
+	#ifdef CONFIG_ASYNC_TX_DMA
+	if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+	if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+	if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+	if (!dma_has_cap(DMA_XOR, device->cap_mask))
+		return false;
+
+	#ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+	if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask))
+		return false;
+	#endif
+	#endif
+
+	#if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+	if (!dma_has_cap(DMA_PQ, device->cap_mask))
+		return false;
+
+	#ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+	if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask))
+		return false;
+	#endif
+	#endif
+
+	return true;
+}
+
+static int get_dma_id(struct dma_device *device)
+{
+	int rc;
+
+ idr_retry:
+	if (!idr_pre_get(&dma_idr, GFP_KERNEL))
+		return -ENOMEM;
+	mutex_lock(&dma_list_mutex);
+	rc = idr_get_new(&dma_idr, NULL, &device->dev_id);
+	mutex_unlock(&dma_list_mutex);
+	if (rc == -EAGAIN)
+		goto idr_retry;
+	else if (rc != 0)
+		return rc;
+
+	return 0;
+}
+
+/**
+ * dma_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+	int chancnt = 0, rc;
+	struct dma_chan* chan;
+	atomic_t *idr_ref;
+
+	if (!device)
+		return -ENODEV;
+
+	/* validate device routines */
+	BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
+		!device->device_prep_dma_memcpy);
+	BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
+		!device->device_prep_dma_xor);
+	BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+		!device->device_prep_dma_xor_val);
+	BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+		!device->device_prep_dma_pq);
+	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+		!device->device_prep_dma_pq_val);
+	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+		!device->device_prep_dma_memset);
+	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
+		!device->device_prep_dma_interrupt);
+	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
+		!device->device_prep_dma_sg);
+	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
+		!device->device_prep_dma_cyclic);
+	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+		!device->device_control);
+	BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) &&
+		!device->device_prep_interleaved_dma);
+
+	BUG_ON(!device->device_alloc_chan_resources);
+	BUG_ON(!device->device_free_chan_resources);
+	BUG_ON(!device->device_tx_status);
+	BUG_ON(!device->device_issue_pending);
+	BUG_ON(!device->dev);
+
+	/* note: this only matters in the
+	 * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
+	 */
+	if (device_has_all_tx_types(device))
+		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
+	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
+	if (!idr_ref)
+		return -ENOMEM;
+	rc = get_dma_id(device);
+	if (rc != 0) {
+		kfree(idr_ref);
+		return rc;
+	}
+
+	atomic_set(idr_ref, 0);
+
+	/* represent channels in sysfs. Probably want devs too */
+	list_for_each_entry(chan, &device->channels, device_node) {
+		rc = -ENOMEM;
+		chan->local = alloc_percpu(typeof(*chan->local));
+		if (chan->local == NULL)
+			goto err_out;
+		chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
+		if (chan->dev == NULL) {
+			free_percpu(chan->local);
+			chan->local = NULL;
+			goto err_out;
+		}
+
+		chan->chan_id = chancnt++;
+		chan->dev->device.class = &dma_devclass;
+		chan->dev->device.parent = device->dev;
+		chan->dev->chan = chan;
+		chan->dev->idr_ref = idr_ref;
+		chan->dev->dev_id = device->dev_id;
+		atomic_inc(idr_ref);
+		dev_set_name(&chan->dev->device, "dma%dchan%d",
+			     device->dev_id, chan->chan_id);
+
+		rc = device_register(&chan->dev->device);
+		if (rc) {
+			free_percpu(chan->local);
+			chan->local = NULL;
+			kfree(chan->dev);
+			atomic_dec(idr_ref);
+			goto err_out;
+		}
+		chan->client_count = 0;
+	}
+	device->chancnt = chancnt;
+
+	mutex_lock(&dma_list_mutex);
+	/* take references on public channels */
+	if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
+		list_for_each_entry(chan, &device->channels, device_node) {
+			/* if clients are already waiting for channels we need
+			 * to take references on their behalf
+			 */
+			if (dma_chan_get(chan) == -ENODEV) {
+				/* note we can only get here for the first
+				 * channel as the remaining channels are
+				 * guaranteed to get a reference
+				 */
+				rc = -ENODEV;
+				mutex_unlock(&dma_list_mutex);
+				goto err_out;
+			}
+		}
+	list_add_tail_rcu(&device->global_node, &dma_device_list);
+	if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+		device->privatecnt++;	/* Always private */
+	dma_channel_rebalance();
+	mutex_unlock(&dma_list_mutex);
+
+	return 0;
+
+err_out:
+	/* if we never registered a channel just release the idr */
+	if (atomic_read(idr_ref) == 0) {
+		mutex_lock(&dma_list_mutex);
+		idr_remove(&dma_idr, device->dev_id);
+		mutex_unlock(&dma_list_mutex);
+		kfree(idr_ref);
+		return rc;
+	}
+
+	list_for_each_entry(chan, &device->channels, device_node) {
+		if (chan->local == NULL)
+			continue;
+		mutex_lock(&dma_list_mutex);
+		chan->dev->chan = NULL;
+		mutex_unlock(&dma_list_mutex);
+		device_unregister(&chan->dev->device);
+		free_percpu(chan->local);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(dma_async_device_register);
+
+/**
+ * dma_async_device_unregister - unregister a DMA device
+ * @device: &dma_device
+ *
+ * This routine is called by dma driver exit routines, dmaengine holds module
+ * references to prevent it being called while channels are in use.
+ */
+void dma_async_device_unregister(struct dma_device *device)
+{
+	struct dma_chan *chan;
+
+	mutex_lock(&dma_list_mutex);
+	list_del_rcu(&device->global_node);
+	dma_channel_rebalance();
+	mutex_unlock(&dma_list_mutex);
+
+	list_for_each_entry(chan, &device->channels, device_node) {
+		WARN_ONCE(chan->client_count,
+			  "%s called while %d clients hold a reference\n",
+			  __func__, chan->client_count);
+		mutex_lock(&dma_list_mutex);
+		chan->dev->chan = NULL;
+		mutex_unlock(&dma_list_mutex);
+		device_unregister(&chan->dev->device);
+		free_percpu(chan->local);
+	}
+}
+EXPORT_SYMBOL(dma_async_device_unregister);
+
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+			void *src, size_t len)
+{
+	struct dma_device *dev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
+	flags = DMA_CTRL_ACK |
+		DMA_COMPL_SRC_UNMAP_SINGLE |
+		DMA_COMPL_DEST_UNMAP_SINGLE;
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+
+	if (!tx) {
+		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		return -ENOMEM;
+	}
+
+	tx->callback = NULL;
+	cookie = tx->tx_submit(tx);
+
+	preempt_disable();
+	__this_cpu_add(chan->local->bytes_transferred, len);
+	__this_cpu_inc(chan->local->memcpy_count);
+	preempt_enable();
+
+	return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+			unsigned int offset, void *kdata, size_t len)
+{
+	struct dma_device *dev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
+	flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+
+	if (!tx) {
+		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		return -ENOMEM;
+	}
+
+	tx->callback = NULL;
+	cookie = tx->tx_submit(tx);
+
+	preempt_disable();
+	__this_cpu_add(chan->local->bytes_transferred, len);
+	__this_cpu_inc(chan->local->memcpy_count);
+	preempt_enable();
+
+	return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
+/**
+ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
+ * @chan: DMA channel to offload copy to
+ * @dest_pg: destination page
+ * @dest_off: offset in page to copy to
+ * @src_pg: source page
+ * @src_off: offset in page to copy from
+ * @len: length
+ *
+ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
+ * address according to the DMA mapping API rules for streaming mappings.
+ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
+ * (kernel memory or locked user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
+	unsigned int dest_off, struct page *src_pg, unsigned int src_off,
+	size_t len)
+{
+	struct dma_device *dev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
+				DMA_FROM_DEVICE);
+	flags = DMA_CTRL_ACK;
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+
+	if (!tx) {
+		dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		return -ENOMEM;
+	}
+
+	tx->callback = NULL;
+	cookie = tx->tx_submit(tx);
+
+	preempt_disable();
+	__this_cpu_add(chan->local->bytes_transferred, len);
+	__this_cpu_inc(chan->local->memcpy_count);
+	preempt_enable();
+
+	return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
+
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+	struct dma_chan *chan)
+{
+	tx->chan = chan;
+	#ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	spin_lock_init(&tx->lock);
+	#endif
+}
+EXPORT_SYMBOL(dma_async_tx_descriptor_init);
+
+/* dma_wait_for_async_tx - spin wait for a transaction to complete
+ * @tx: in-flight transaction to wait on
+ */
+enum dma_status
+dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+	if (!tx)
+		return DMA_SUCCESS;
+
+	while (tx->cookie == -EBUSY) {
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			pr_err("%s timeout waiting for descriptor submission\n",
+				__func__);
+			return DMA_ERROR;
+		}
+		cpu_relax();
+	}
+	return dma_sync_wait(tx->chan, tx->cookie);
+}
+EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
+
+/* dma_run_dependencies - helper routine for dma drivers to process
+ *	(start) dependent operations on their target channel
+ * @tx: transaction with dependencies
+ */
+void dma_run_dependencies(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_async_tx_descriptor *dep = txd_next(tx);
+	struct dma_async_tx_descriptor *dep_next;
+	struct dma_chan *chan;
+
+	if (!dep)
+		return;
+
+	/* we'll submit tx->next now, so clear the link */
+	txd_clear_next(tx);
+	chan = dep->chan;
+
+	/* keep submitting up until a channel switch is detected
+	 * in that case we will be called again as a result of
+	 * processing the interrupt from async_tx_channel_switch
+	 */
+	for (; dep; dep = dep_next) {
+		txd_lock(dep);
+		txd_clear_parent(dep);
+		dep_next = txd_next(dep);
+		if (dep_next && dep_next->chan == chan)
+			txd_clear_next(dep); /* ->next will be submitted */
+		else
+			dep_next = NULL; /* submit current dep and terminate */
+		txd_unlock(dep);
+
+		dep->tx_submit(dep);
+	}
+
+	chan->device->device_issue_pending(chan);
+}
+EXPORT_SYMBOL_GPL(dma_run_dependencies);
+
+static int __init dma_bus_init(void)
+{
+	return class_register(&dma_devclass);
+}
+arch_initcall(dma_bus_init);
+
+
diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
new file mode 100644
index 00000000..17f983a4
--- /dev/null
+++ b/drivers/dma/dmaengine.h
@@ -0,0 +1,89 @@
+/*
+ * The contents of this file are private to DMA engine drivers, and is not
+ * part of the API to be used by DMA engine users.
+ */
+#ifndef DMAENGINE_H
+#define DMAENGINE_H
+
+#include <linux/bug.h>
+#include <linux/dmaengine.h>
+
+/**
+ * dma_cookie_init - initialize the cookies for a DMA channel
+ * @chan: dma channel to initialize
+ */
+static inline void dma_cookie_init(struct dma_chan *chan)
+{
+	chan->cookie = DMA_MIN_COOKIE;
+	chan->completed_cookie = DMA_MIN_COOKIE;
+}
+
+/**
+ * dma_cookie_assign - assign a DMA engine cookie to the descriptor
+ * @tx: descriptor needing cookie
+ *
+ * Assign a unique non-zero per-channel cookie to the descriptor.
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *chan = tx->chan;
+	dma_cookie_t cookie;
+
+	cookie = chan->cookie + 1;
+	if (cookie < DMA_MIN_COOKIE)
+		cookie = DMA_MIN_COOKIE;
+	tx->cookie = chan->cookie = cookie;
+
+	return cookie;
+}
+
+/**
+ * dma_cookie_complete - complete a descriptor
+ * @tx: descriptor to complete
+ *
+ * Mark this descriptor complete by updating the channels completed
+ * cookie marker.  Zero the descriptors cookie to prevent accidental
+ * repeated completions.
+ *
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx)
+{
+	BUG_ON(tx->cookie < DMA_MIN_COOKIE);
+	tx->chan->completed_cookie = tx->cookie;
+	tx->cookie = 0;
+}
+
+/**
+ * dma_cookie_status - report cookie status
+ * @chan: dma channel
+ * @cookie: cookie we are interested in
+ * @state: dma_tx_state structure to return last/used cookies
+ *
+ * Report the status of the cookie, filling in the state structure if
+ * non-NULL.  No locking is required.
+ */
+static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	dma_cookie_t used, complete;
+
+	used = chan->cookie;
+	complete = chan->completed_cookie;
+	barrier();
+	if (state) {
+		state->last = complete;
+		state->used = used;
+		state->residue = 0;
+	}
+	return dma_async_is_complete(cookie, complete, used);
+}
+
+static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
+{
+	if (state)
+		state->residue = residue;
+}
+
+#endif
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644
index 00000000..24225f0f
--- /dev/null
+++ b/drivers/dma/dmatest.c
@@ -0,0 +1,666 @@
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/freezer.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, S_IRUGO);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_channel[20];
+module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static char test_device[20];
+module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, S_IRUGO);
+MODULE_PARM_DESC(threads_per_chan,
+		"Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(max_channels,
+		"Maximum number of channels to use (default: all)");
+
+static unsigned int iterations;
+module_param(iterations, uint, S_IRUGO);
+MODULE_PARM_DESC(iterations,
+		"Iterations before stopping test (default: infinite)");
+
+static unsigned int xor_sources = 3;
+module_param(xor_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(xor_sources,
+		"Number of xor source buffers (default: 3)");
+
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(pq_sources,
+		"Number of p+q source buffers (default: 3)");
+
+static int timeout = 3000;
+module_param(timeout, uint, S_IRUGO);
+MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
+		 "Pass -1 for infinite timeout");
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC		0x80
+#define PATTERN_DST		0x00
+#define PATTERN_COPY		0x40
+#define PATTERN_OVERWRITE	0x20
+#define PATTERN_COUNT_MASK	0x1f
+
+struct dmatest_thread {
+	struct list_head	node;
+	struct task_struct	*task;
+	struct dma_chan		*chan;
+	u8			**srcs;
+	u8			**dsts;
+	enum dma_transaction_type type;
+};
+
+struct dmatest_chan {
+	struct list_head	node;
+	struct dma_chan		*chan;
+	struct list_head	threads;
+};
+
+/*
+ * These are protected by dma_list_mutex since they're only used by
+ * the DMA filter function callback
+ */
+static LIST_HEAD(dmatest_channels);
+static unsigned int nr_channels;
+
+static bool dmatest_match_channel(struct dma_chan *chan)
+{
+	if (test_channel[0] == '\0')
+		return true;
+	return strcmp(dma_chan_name(chan), test_channel) == 0;
+}
+
+static bool dmatest_match_device(struct dma_device *device)
+{
+	if (test_device[0] == '\0')
+		return true;
+	return strcmp(dev_name(device->dev), test_device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+	unsigned long buf;
+
+	get_random_bytes(&buf, sizeof(buf));
+	return buf;
+}
+
+static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
+{
+	unsigned int i;
+	u8 *buf;
+
+	for (; (buf = *bufs); bufs++) {
+		for (i = 0; i < start; i++)
+			buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+		for ( ; i < start + len; i++)
+			buf[i] = PATTERN_SRC | PATTERN_COPY
+				| (~i & PATTERN_COUNT_MASK);
+		for ( ; i < test_buf_size; i++)
+			buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+		buf++;
+	}
+}
+
+static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
+{
+	unsigned int i;
+	u8 *buf;
+
+	for (; (buf = *bufs); bufs++) {
+		for (i = 0; i < start; i++)
+			buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+		for ( ; i < start + len; i++)
+			buf[i] = PATTERN_DST | PATTERN_OVERWRITE
+				| (~i & PATTERN_COUNT_MASK);
+		for ( ; i < test_buf_size; i++)
+			buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+	}
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+		unsigned int counter, bool is_srcbuf)
+{
+	u8		diff = actual ^ pattern;
+	u8		expected = pattern | (~counter & PATTERN_COUNT_MASK);
+	const char	*thread_name = current->comm;
+
+	if (is_srcbuf)
+		pr_warning("%s: srcbuf[0x%x] overwritten!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+	else if ((pattern & PATTERN_COPY)
+			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+		pr_warning("%s: dstbuf[0x%x] not copied!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+	else if (diff & PATTERN_SRC)
+		pr_warning("%s: dstbuf[0x%x] was copied!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+	else
+		pr_warning("%s: dstbuf[0x%x] mismatch!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+		unsigned int end, unsigned int counter, u8 pattern,
+		bool is_srcbuf)
+{
+	unsigned int i;
+	unsigned int error_count = 0;
+	u8 actual;
+	u8 expected;
+	u8 *buf;
+	unsigned int counter_orig = counter;
+
+	for (; (buf = *bufs); bufs++) {
+		counter = counter_orig;
+		for (i = start; i < end; i++) {
+			actual = buf[i];
+			expected = pattern | (~counter & PATTERN_COUNT_MASK);
+			if (actual != expected) {
+				if (error_count < 32)
+					dmatest_mismatch(actual, pattern, i,
+							 counter, is_srcbuf);
+				error_count++;
+			}
+			counter++;
+		}
+	}
+
+	if (error_count > 32)
+		pr_warning("%s: %u errors suppressed\n",
+			current->comm, error_count - 32);
+
+	return error_count;
+}
+
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+	bool			done;
+	wait_queue_head_t	*wait;
+};
+
+static void dmatest_callback(void *arg)
+{
+	struct dmatest_done *done = arg;
+
+	done->done = true;
+	wake_up_all(done->wait);
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets for a given operation type until it is told to exit by
+ * kthread_stop(). There may be multiple threads running this function
+ * in parallel for a single channel, and there may be multiple channels
+ * being tested in parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
+	struct dmatest_thread	*thread = data;
+	struct dmatest_done	done = { .wait = &done_wait };
+	struct dma_chan		*chan;
+	const char		*thread_name;
+	unsigned int		src_off, dst_off, len;
+	unsigned int		error_count;
+	unsigned int		failed_tests = 0;
+	unsigned int		total_tests = 0;
+	dma_cookie_t		cookie;
+	enum dma_status		status;
+	enum dma_ctrl_flags 	flags;
+	u8			pq_coefs[pq_sources + 1];
+	int			ret;
+	int			src_cnt;
+	int			dst_cnt;
+	int			i;
+
+	thread_name = current->comm;
+	set_freezable();
+
+	ret = -ENOMEM;
+
+	smp_rmb();
+	chan = thread->chan;
+	if (thread->type == DMA_MEMCPY)
+		src_cnt = dst_cnt = 1;
+	else if (thread->type == DMA_XOR) {
+		src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
+		dst_cnt = 1;
+	} else if (thread->type == DMA_PQ) {
+		src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+		dst_cnt = 2;
+		for (i = 0; i < src_cnt; i++)
+			pq_coefs[i] = 1;
+	} else
+		goto err_srcs;
+
+	thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->srcs)
+		goto err_srcs;
+	for (i = 0; i < src_cnt; i++) {
+		thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
+		if (!thread->srcs[i])
+			goto err_srcbuf;
+	}
+	thread->srcs[i] = NULL;
+
+	thread->dsts = kcalloc(dst_cnt+1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->dsts)
+		goto err_dsts;
+	for (i = 0; i < dst_cnt; i++) {
+		thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
+		if (!thread->dsts[i])
+			goto err_dstbuf;
+	}
+	thread->dsts[i] = NULL;
+
+	set_user_nice(current, 10);
+
+	/*
+	 * src buffers are freed by the DMAEngine code with dma_unmap_single()
+	 * dst buffers are freed by ourselves below
+	 */
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
+	      | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
+
+	while (!kthread_should_stop()
+	       && !(iterations && total_tests >= iterations)) {
+		struct dma_device *dev = chan->device;
+		struct dma_async_tx_descriptor *tx = NULL;
+		dma_addr_t dma_srcs[src_cnt];
+		dma_addr_t dma_dsts[dst_cnt];
+		u8 align = 0;
+
+		total_tests++;
+
+		/* honor alignment restrictions */
+		if (thread->type == DMA_MEMCPY)
+			align = dev->copy_align;
+		else if (thread->type == DMA_XOR)
+			align = dev->xor_align;
+		else if (thread->type == DMA_PQ)
+			align = dev->pq_align;
+
+		if (1 << align > test_buf_size) {
+			pr_err("%u-byte buffer too small for %d-byte alignment\n",
+			       test_buf_size, 1 << align);
+			break;
+		}
+
+		len = dmatest_random() % test_buf_size + 1;
+		len = (len >> align) << align;
+		if (!len)
+			len = 1 << align;
+		src_off = dmatest_random() % (test_buf_size - len + 1);
+		dst_off = dmatest_random() % (test_buf_size - len + 1);
+
+		src_off = (src_off >> align) << align;
+		dst_off = (dst_off >> align) << align;
+
+		dmatest_init_srcs(thread->srcs, src_off, len);
+		dmatest_init_dsts(thread->dsts, dst_off, len);
+
+		for (i = 0; i < src_cnt; i++) {
+			u8 *buf = thread->srcs[i] + src_off;
+
+			dma_srcs[i] = dma_map_single(dev->dev, buf, len,
+						     DMA_TO_DEVICE);
+		}
+		/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+		for (i = 0; i < dst_cnt; i++) {
+			dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
+						     test_buf_size,
+						     DMA_BIDIRECTIONAL);
+		}
+
+
+		if (thread->type == DMA_MEMCPY)
+			tx = dev->device_prep_dma_memcpy(chan,
+							 dma_dsts[0] + dst_off,
+							 dma_srcs[0], len,
+							 flags);
+		else if (thread->type == DMA_XOR)
+			tx = dev->device_prep_dma_xor(chan,
+						      dma_dsts[0] + dst_off,
+						      dma_srcs, src_cnt,
+						      len, flags);
+		else if (thread->type == DMA_PQ) {
+			dma_addr_t dma_pq[dst_cnt];
+
+			for (i = 0; i < dst_cnt; i++)
+				dma_pq[i] = dma_dsts[i] + dst_off;
+			tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+						     src_cnt, pq_coefs,
+						     len, flags);
+		}
+
+		if (!tx) {
+			for (i = 0; i < src_cnt; i++)
+				dma_unmap_single(dev->dev, dma_srcs[i], len,
+						 DMA_TO_DEVICE);
+			for (i = 0; i < dst_cnt; i++)
+				dma_unmap_single(dev->dev, dma_dsts[i],
+						 test_buf_size,
+						 DMA_BIDIRECTIONAL);
+			pr_warning("%s: #%u: prep error with src_off=0x%x "
+					"dst_off=0x%x len=0x%x\n",
+					thread_name, total_tests - 1,
+					src_off, dst_off, len);
+			msleep(100);
+			failed_tests++;
+			continue;
+		}
+
+		done.done = false;
+		tx->callback = dmatest_callback;
+		tx->callback_param = &done;
+		cookie = tx->tx_submit(tx);
+
+		if (dma_submit_error(cookie)) {
+			pr_warning("%s: #%u: submit error %d with src_off=0x%x "
+					"dst_off=0x%x len=0x%x\n",
+					thread_name, total_tests - 1, cookie,
+					src_off, dst_off, len);
+			msleep(100);
+			failed_tests++;
+			continue;
+		}
+		dma_async_issue_pending(chan);
+
+		wait_event_freezable_timeout(done_wait, done.done,
+					     msecs_to_jiffies(timeout));
+
+		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+
+		if (!done.done) {
+			/*
+			 * We're leaving the timed out dma operation with
+			 * dangling pointer to done_wait.  To make this
+			 * correct, we'll need to allocate wait_done for
+			 * each test iteration and perform "who's gonna
+			 * free it this time?" dancing.  For now, just
+			 * leave it dangling.
+			 */
+			pr_warning("%s: #%u: test timed out\n",
+				   thread_name, total_tests - 1);
+			failed_tests++;
+			continue;
+		} else if (status != DMA_SUCCESS) {
+			pr_warning("%s: #%u: got completion callback,"
+				   " but status is \'%s\'\n",
+				   thread_name, total_tests - 1,
+				   status == DMA_ERROR ? "error" : "in progress");
+			failed_tests++;
+			continue;
+		}
+
+		/* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
+		for (i = 0; i < dst_cnt; i++)
+			dma_unmap_single(dev->dev, dma_dsts[i], test_buf_size,
+					 DMA_BIDIRECTIONAL);
+
+		error_count = 0;
+
+		pr_debug("%s: verifying source buffer...\n", thread_name);
+		error_count += dmatest_verify(thread->srcs, 0, src_off,
+				0, PATTERN_SRC, true);
+		error_count += dmatest_verify(thread->srcs, src_off,
+				src_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, true);
+		error_count += dmatest_verify(thread->srcs, src_off + len,
+				test_buf_size, src_off + len,
+				PATTERN_SRC, true);
+
+		pr_debug("%s: verifying dest buffer...\n",
+				thread->task->comm);
+		error_count += dmatest_verify(thread->dsts, 0, dst_off,
+				0, PATTERN_DST, false);
+		error_count += dmatest_verify(thread->dsts, dst_off,
+				dst_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, false);
+		error_count += dmatest_verify(thread->dsts, dst_off + len,
+				test_buf_size, dst_off + len,
+				PATTERN_DST, false);
+
+		if (error_count) {
+			pr_warning("%s: #%u: %u errors with "
+				"src_off=0x%x dst_off=0x%x len=0x%x\n",
+				thread_name, total_tests - 1, error_count,
+				src_off, dst_off, len);
+			failed_tests++;
+		} else {
+			pr_debug("%s: #%u: No errors with "
+				"src_off=0x%x dst_off=0x%x len=0x%x\n",
+				thread_name, total_tests - 1,
+				src_off, dst_off, len);
+		}
+	}
+
+	ret = 0;
+	for (i = 0; thread->dsts[i]; i++)
+		kfree(thread->dsts[i]);
+err_dstbuf:
+	kfree(thread->dsts);
+err_dsts:
+	for (i = 0; thread->srcs[i]; i++)
+		kfree(thread->srcs[i]);
+err_srcbuf:
+	kfree(thread->srcs);
+err_srcs:
+	pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
+			thread_name, total_tests, failed_tests, ret);
+
+	/* terminate all transfers on specified channels */
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+	if (iterations > 0)
+		while (!kthread_should_stop()) {
+			DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
+			interruptible_sleep_on(&wait_dmatest_exit);
+		}
+
+	return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+	struct dmatest_thread	*thread;
+	struct dmatest_thread	*_thread;
+	int			ret;
+
+	list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+		ret = kthread_stop(thread->task);
+		pr_debug("dmatest: thread %s exited with status %d\n",
+				thread->task->comm, ret);
+		list_del(&thread->node);
+		kfree(thread);
+	}
+
+	/* terminate all transfers on specified channels */
+	dtc->chan->device->device_control(dtc->chan, DMA_TERMINATE_ALL, 0);
+
+	kfree(dtc);
+}
+
+static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_type type)
+{
+	struct dmatest_thread *thread;
+	struct dma_chan *chan = dtc->chan;
+	char *op;
+	unsigned int i;
+
+	if (type == DMA_MEMCPY)
+		op = "copy";
+	else if (type == DMA_XOR)
+		op = "xor";
+	else if (type == DMA_PQ)
+		op = "pq";
+	else
+		return -EINVAL;
+
+	for (i = 0; i < threads_per_chan; i++) {
+		thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+		if (!thread) {
+			pr_warning("dmatest: No memory for %s-%s%u\n",
+				   dma_chan_name(chan), op, i);
+
+			break;
+		}
+		thread->chan = dtc->chan;
+		thread->type = type;
+		smp_wmb();
+		thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+				dma_chan_name(chan), op, i);
+		if (IS_ERR(thread->task)) {
+			pr_warning("dmatest: Failed to run thread %s-%s%u\n",
+					dma_chan_name(chan), op, i);
+			kfree(thread);
+			break;
+		}
+
+		/* srcbuf and dstbuf are allocated by the thread itself */
+
+		list_add_tail(&thread->node, &dtc->threads);
+	}
+
+	return i;
+}
+
+static int dmatest_add_channel(struct dma_chan *chan)
+{
+	struct dmatest_chan	*dtc;
+	struct dma_device	*dma_dev = chan->device;
+	unsigned int		thread_count = 0;
+	int cnt;
+
+	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+	if (!dtc) {
+		pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+		return -ENOMEM;
+	}
+
+	dtc->chan = chan;
+	INIT_LIST_HEAD(&dtc->threads);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
+		thread_count += cnt > 0 ? cnt : 0;
+	}
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		cnt = dmatest_add_threads(dtc, DMA_XOR);
+		thread_count += cnt > 0 ? cnt : 0;
+	}
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+		cnt = dmatest_add_threads(dtc, DMA_PQ);
+		thread_count += cnt > 0 ? cnt : 0;
+	}
+
+	pr_info("dmatest: Started %u threads using %s\n",
+		thread_count, dma_chan_name(chan));
+
+	list_add_tail(&dtc->node, &dmatest_channels);
+	nr_channels++;
+
+	return 0;
+}
+
+static bool filter(struct dma_chan *chan, void *param)
+{
+	if (!dmatest_match_channel(chan) || !dmatest_match_device(chan->device))
+		return false;
+	else
+		return true;
+}
+
+static int __init dmatest_init(void)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+	int err = 0;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+	for (;;) {
+		chan = dma_request_channel(mask, filter, NULL);
+		if (chan) {
+			err = dmatest_add_channel(chan);
+			if (err) {
+				dma_release_channel(chan);
+				break; /* add_channel failed, punt */
+			}
+		} else
+			break; /* no more channels available */
+		if (max_channels && nr_channels >= max_channels)
+			break; /* we have all we need */
+	}
+
+	return err;
+}
+/* when compiled-in wait for drivers to load first */
+late_initcall(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+	struct dmatest_chan *dtc, *_dtc;
+	struct dma_chan *chan;
+
+	list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
+		list_del(&dtc->node);
+		chan = dtc->chan;
+		dmatest_cleanup_channel(dtc);
+		pr_debug("dmatest: dropped channel %s\n",
+			 dma_chan_name(chan));
+		dma_release_channel(chan);
+	}
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
new file mode 100644
index 00000000..7439079f
--- /dev/null
+++ b/drivers/dma/dw_dmac.c
@@ -0,0 +1,1619 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
+ * AVR32 systems.)
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dw_dmac_regs.h"
+#include "dmaengine.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more).  See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ *
+ * The driver has currently been tested only with the Atmel AT32AP7000,
+ * which does not support descriptor writeback.
+ */
+
+#define DWC_DEFAULT_CTLLO(_chan) ({				\
+		struct dw_dma_slave *__slave = (_chan->private);	\
+		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
+		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
+		int _dms = __slave ? __slave->dst_master : 0;	\
+		int _sms = __slave ? __slave->src_master : 1;	\
+		u8 _smsize = __slave ? _sconfig->src_maxburst :	\
+			DW_DMA_MSIZE_16;			\
+		u8 _dmsize = __slave ? _sconfig->dst_maxburst :	\
+			DW_DMA_MSIZE_16;			\
+								\
+		(DWC_CTLL_DST_MSIZE(_dmsize)			\
+		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
+		 | DWC_CTLL_LLP_D_EN				\
+		 | DWC_CTLL_LLP_S_EN				\
+		 | DWC_CTLL_DMS(_dms)				\
+		 | DWC_CTLL_SMS(_sms));				\
+	})
+
+/*
+ * This is configuration-dependent and usually a funny size like 4095.
+ *
+ * Note that this is a transfer count, i.e. if we transfer 32-bit
+ * words, we can do 16380 bytes per descriptor.
+ *
+ * This parameter is also system-specific.
+ */
+#define DWC_MAX_COUNT	4095U
+
+/*
+ * Number of descriptors to allocate for each channel. This should be
+ * made configurable somehow; preferably, the clients (at least the
+ * ones using slave transfers) should be able to give us a hint.
+ */
+#define NR_DESCS_PER_CHANNEL	64
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because we're not relying on writeback from the controller (it may not
+ * even be configured into the core!) we don't need to use dma_pool.  These
+ * descriptors -- and associated data -- are cacheable.  We do need to make
+ * sure their dcache entries are written back before handing them off to
+ * the controller, though.
+ */
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+	return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc, *_desc;
+	struct dw_desc *ret = NULL;
+	unsigned int i = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
+		i++;
+	}
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
+
+	return ret;
+}
+
+static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	struct dw_desc	*child;
+
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		dma_sync_single_for_cpu(chan2parent(&dwc->chan),
+				child->txd.phys, sizeof(child->lli),
+				DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(chan2parent(&dwc->chan),
+			desc->txd.phys, sizeof(desc->lli),
+			DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	unsigned long flags;
+
+	if (desc) {
+		struct dw_desc *child;
+
+		dwc_sync_desc_for_cpu(dwc, desc);
+
+		spin_lock_irqsave(&dwc->lock, flags);
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			dev_vdbg(chan2dev(&dwc->chan),
+					"moving child desc %p to freelist\n",
+					child);
+		list_splice_init(&desc->tx_list, &dwc->free_list);
+		dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
+		list_add(&desc->desc_node, &dwc->free_list);
+		spin_unlock_irqrestore(&dwc->lock, flags);
+	}
+}
+
+static void dwc_initialize(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	struct dw_dma_slave *dws = dwc->chan.private;
+	u32 cfghi = DWC_CFGH_FIFO_MODE;
+	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+
+	if (dwc->initialized == true)
+		return;
+
+	if (dws) {
+		/*
+		 * We need controller-specific data to set up slave
+		 * transfers.
+		 */
+		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
+
+		cfghi = dws->cfg_hi;
+		cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
+	}
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+
+	/* Enable interrupts */
+	channel_set_bit(dw, MASK.XFER, dwc->mask);
+	channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+	dwc->initialized = true;
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+
+	/* ASSERT:  channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(chan2dev(&dwc->chan),
+			"BUG: Attempted to start non-idle channel\n");
+		dev_err(chan2dev(&dwc->chan),
+			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+			channel_readl(dwc, SAR),
+			channel_readl(dwc, DAR),
+			channel_readl(dwc, LLP),
+			channel_readl(dwc, CTL_HI),
+			channel_readl(dwc, CTL_LO));
+
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	dwc_initialize(dwc);
+
+	channel_writel(dwc, LLP, first->txd.phys);
+	channel_writel(dwc, CTL_LO,
+			DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	channel_writel(dwc, CTL_HI, 0);
+	channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
+		bool callback_required)
+{
+	dma_async_tx_callback		callback = NULL;
+	void				*param = NULL;
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	struct dw_desc			*child;
+	unsigned long			flags;
+
+	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	dma_cookie_complete(txd);
+	if (callback_required) {
+		callback = txd->callback;
+		param = txd->callback_param;
+	}
+
+	dwc_sync_desc_for_cpu(dwc, desc);
+
+	/* async_tx_ack */
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		async_tx_ack(&child->txd);
+	async_tx_ack(&desc->txd);
+
+	list_splice_init(&desc->tx_list, &dwc->free_list);
+	list_move(&desc->desc_node, &dwc->free_list);
+
+	if (!dwc->chan.private) {
+		struct device *parent = chan2parent(&dwc->chan);
+		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+				dma_unmap_single(parent, desc->lli.dar,
+						desc->len, DMA_FROM_DEVICE);
+			else
+				dma_unmap_page(parent, desc->lli.dar,
+						desc->len, DMA_FROM_DEVICE);
+		}
+		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+				dma_unmap_single(parent, desc->lli.sar,
+						desc->len, DMA_TO_DEVICE);
+			else
+				dma_unmap_page(parent, desc->lli.sar,
+						desc->len, DMA_TO_DEVICE);
+		}
+	}
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	if (callback_required && callback)
+		callback(param);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc, *_desc;
+	LIST_HEAD(list);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(chan2dev(&dwc->chan),
+			"BUG: XFER bit set, but channel not idle!\n");
+
+		/* Try to continue after resetting the channel... */
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+		while (dma_readl(dw, CH_EN) & dwc->mask)
+			cpu_relax();
+	}
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	list_splice_init(&dwc->active_list, &list);
+	if (!list_empty(&dwc->queue)) {
+		list_move(dwc->queue.next, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
+	}
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		dwc_descriptor_complete(dwc, desc, true);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	dma_addr_t llp;
+	struct dw_desc *desc, *_desc;
+	struct dw_desc *child;
+	u32 status_xfer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	llp = channel_readl(dwc, LLP);
+	status_xfer = dma_readl(dw, RAW.XFER);
+
+	if (status_xfer & dwc->mask) {
+		/* Everything we've submitted is done */
+		dma_writel(dw, CLEAR.XFER, dwc->mask);
+		spin_unlock_irqrestore(&dwc->lock, flags);
+
+		dwc_complete_all(dw, dwc);
+		return;
+	}
+
+	if (list_empty(&dwc->active_list)) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		return;
+	}
+
+	dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
+
+	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+		/* check first descriptors addr */
+		if (desc->txd.phys == llp) {
+			spin_unlock_irqrestore(&dwc->lock, flags);
+			return;
+		}
+
+		/* check first descriptors llp */
+		if (desc->lli.llp == llp) {
+			/* This one is currently in progress */
+			spin_unlock_irqrestore(&dwc->lock, flags);
+			return;
+		}
+
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			if (child->lli.llp == llp) {
+				/* Currently in progress */
+				spin_unlock_irqrestore(&dwc->lock, flags);
+				return;
+			}
+
+		/*
+		 * No descriptors so far seem to be in progress, i.e.
+		 * this one must be done.
+		 */
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		dwc_descriptor_complete(dwc, desc, true);
+		spin_lock_irqsave(&dwc->lock, flags);
+	}
+
+	dev_err(chan2dev(&dwc->chan),
+		"BUG: All descriptors done, but channel not idle!\n");
+
+	/* Try to continue after resetting the channel... */
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+
+	if (!list_empty(&dwc->queue)) {
+		list_move(dwc->queue.next, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
+	}
+	spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
+static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+{
+	dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
+			"  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+			lli->sar, lli->dar, lli->llp,
+			lli->ctlhi, lli->ctllo);
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	struct dw_desc *bad_desc;
+	struct dw_desc *child;
+	unsigned long flags;
+
+	dwc_scan_descriptors(dw, dwc);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * borked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	bad_desc = dwc_first_active(dwc);
+	list_del_init(&bad_desc->desc_node);
+	list_move(dwc->queue.next, dwc->active_list.prev);
+
+	/* Clear the error flag and try to restart the controller */
+	dma_writel(dw, CLEAR.ERROR, dwc->mask);
+	if (!list_empty(&dwc->active_list))
+		dwc_dostart(dwc, dwc_first_active(dwc));
+
+	/*
+	 * KERN_CRITICAL may seem harsh, but since this only happens
+	 * when someone submits a bad physical address in a
+	 * descriptor, we should consider ourselves lucky that the
+	 * controller flagged an error instead of scribbling over
+	 * random memory locations.
+	 */
+	dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
+			"Bad descriptor submitted for DMA!\n");
+	dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
+			"  cookie: %d\n", bad_desc->txd.cookie);
+	dwc_dump_lli(dwc, &bad_desc->lli);
+	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+		dwc_dump_lli(dwc, &child->lli);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	/* Pretend the descriptor completed successfully */
+	dwc_descriptor_complete(dwc, bad_desc, true);
+}
+
+/* --------------------- Cyclic DMA API extensions -------------------- */
+
+inline dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+	return channel_readl(dwc, SAR);
+}
+EXPORT_SYMBOL(dw_dma_get_src_addr);
+
+inline dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+	return channel_readl(dwc, DAR);
+}
+EXPORT_SYMBOL(dw_dma_get_dst_addr);
+
+/* called with dwc->lock held and all DMAC interrupts disabled */
+static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
+		u32 status_err, u32 status_xfer)
+{
+	unsigned long flags;
+
+	if (dwc->mask) {
+		void (*callback)(void *param);
+		void *callback_param;
+
+		dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
+				channel_readl(dwc, LLP));
+
+		callback = dwc->cdesc->period_callback;
+		callback_param = dwc->cdesc->period_callback_param;
+
+		if (callback)
+			callback(callback_param);
+	}
+
+	/*
+	 * Error and transfer complete are highly unlikely, and will most
+	 * likely be due to a configuration error by the user.
+	 */
+	if (unlikely(status_err & dwc->mask) ||
+			unlikely(status_xfer & dwc->mask)) {
+		int i;
+
+		dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s "
+				"interrupt, stopping DMA transfer\n",
+				status_xfer ? "xfer" : "error");
+
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		dev_err(chan2dev(&dwc->chan),
+			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+			channel_readl(dwc, SAR),
+			channel_readl(dwc, DAR),
+			channel_readl(dwc, LLP),
+			channel_readl(dwc, CTL_HI),
+			channel_readl(dwc, CTL_LO));
+
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+		while (dma_readl(dw, CH_EN) & dwc->mask)
+			cpu_relax();
+
+		/* make sure DMA does not restart by loading a new list */
+		channel_writel(dwc, LLP, 0);
+		channel_writel(dwc, CTL_LO, 0);
+		channel_writel(dwc, CTL_HI, 0);
+
+		dma_writel(dw, CLEAR.ERROR, dwc->mask);
+		dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+		for (i = 0; i < dwc->cdesc->periods; i++)
+			dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void dw_dma_tasklet(unsigned long data)
+{
+	struct dw_dma *dw = (struct dw_dma *)data;
+	struct dw_dma_chan *dwc;
+	u32 status_xfer;
+	u32 status_err;
+	int i;
+
+	status_xfer = dma_readl(dw, RAW.XFER);
+	status_err = dma_readl(dw, RAW.ERROR);
+
+	dev_vdbg(dw->dma.dev, "tasklet: status_err=%x\n", status_err);
+
+	for (i = 0; i < dw->dma.chancnt; i++) {
+		dwc = &dw->chan[i];
+		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
+			dwc_handle_cyclic(dw, dwc, status_err, status_xfer);
+		else if (status_err & (1 << i))
+			dwc_handle_error(dw, dwc);
+		else if (status_xfer & (1 << i))
+			dwc_scan_descriptors(dw, dwc);
+	}
+
+	/*
+	 * Re-enable interrupts.
+	 */
+	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+	struct dw_dma *dw = dev_id;
+	u32 status;
+
+	dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
+			dma_readl(dw, STATUS_INT));
+
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	status = dma_readl(dw, STATUS_INT);
+	if (status) {
+		dev_err(dw->dma.dev,
+			"BUG: Unexpected interrupts pending: 0x%x\n",
+			status);
+
+		/* Try to recover */
+		channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+	}
+
+	tasklet_schedule(&dw->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dw_desc		*desc = txd_to_dw_desc(tx);
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	/*
+	 * REVISIT: We should attempt to chain as many descriptors as
+	 * possible, perhaps even appending to those already submitted
+	 * for DMA. But this is hard to do in a race-free manner.
+	 */
+	if (list_empty(&dwc->active_list)) {
+		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+				desc->txd.cookie);
+		list_add_tail(&desc->desc_node, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
+	} else {
+		dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+				desc->txd.cookie);
+
+		list_add_tail(&desc->desc_node, &dwc->queue);
+	}
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_desc		*desc;
+	struct dw_desc		*first;
+	struct dw_desc		*prev;
+	size_t			xfer_count;
+	size_t			offset;
+	unsigned int		src_width;
+	unsigned int		dst_width;
+	u32			ctllo;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
+			dest, src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	/*
+	 * We can be a lot more clever here, but this should take care
+	 * of the most common optimization.
+	 */
+	if (!((src | dest  | len) & 7))
+		src_width = dst_width = 3;
+	else if (!((src | dest  | len) & 3))
+		src_width = dst_width = 2;
+	else if (!((src | dest | len) & 1))
+		src_width = dst_width = 1;
+	else
+		src_width = dst_width = 0;
+
+	ctllo = DWC_DEFAULT_CTLLO(chan)
+			| DWC_CTLL_DST_WIDTH(dst_width)
+			| DWC_CTLL_SRC_WIDTH(src_width)
+			| DWC_CTLL_DST_INC
+			| DWC_CTLL_SRC_INC
+			| DWC_CTLL_FC_M2M;
+	prev = first = NULL;
+
+	for (offset = 0; offset < len; offset += xfer_count << src_width) {
+		xfer_count = min_t(size_t, (len - offset) >> src_width,
+				DWC_MAX_COUNT);
+
+		desc = dwc_desc_get(dwc);
+		if (!desc)
+			goto err_desc_get;
+
+		desc->lli.sar = src + offset;
+		desc->lli.dar = dest + offset;
+		desc->lli.ctllo = ctllo;
+		desc->lli.ctlhi = xfer_count;
+
+		if (!first) {
+			first = desc;
+		} else {
+			prev->lli.llp = desc->txd.phys;
+			dma_sync_single_for_device(chan2parent(chan),
+					prev->txd.phys, sizeof(prev->lli),
+					DMA_TO_DEVICE);
+			list_add_tail(&desc->desc_node,
+					&first->tx_list);
+		}
+		prev = desc;
+	}
+
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last block */
+		prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+	prev->lli.llp = 0;
+	dma_sync_single_for_device(chan2parent(chan),
+			prev->txd.phys, sizeof(prev->lli),
+			DMA_TO_DEVICE);
+
+	first->txd.flags = flags;
+	first->len = len;
+
+	return &first->txd;
+
+err_desc_get:
+	dwc_desc_put(dwc, first);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma_slave	*dws = chan->private;
+	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
+	struct dw_desc		*prev;
+	struct dw_desc		*first;
+	u32			ctllo;
+	dma_addr_t		reg;
+	unsigned int		reg_width;
+	unsigned int		mem_width;
+	unsigned int		i;
+	struct scatterlist	*sg;
+	size_t			total_len = 0;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+
+	if (unlikely(!dws || !sg_len))
+		return NULL;
+
+	prev = first = NULL;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		reg_width = __fls(sconfig->dst_addr_width);
+		reg = sconfig->dst_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
+				| DWC_CTLL_DST_WIDTH(reg_width)
+				| DWC_CTLL_DST_FIX
+				| DWC_CTLL_SRC_INC);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct dw_desc	*desc;
+			u32		len, dlen, mem;
+
+			mem = sg_phys(sg);
+			len = sg_dma_len(sg);
+
+			if (!((mem | len) & 7))
+				mem_width = 3;
+			else if (!((mem | len) & 3))
+				mem_width = 2;
+			else if (!((mem | len) & 1))
+				mem_width = 1;
+			else
+				mem_width = 0;
+
+slave_sg_todev_fill_desc:
+			desc = dwc_desc_get(dwc);
+			if (!desc) {
+				dev_err(chan2dev(chan),
+					"not enough descriptors available\n");
+				goto err_desc_get;
+			}
+
+			desc->lli.sar = mem;
+			desc->lli.dar = reg;
+			desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
+			if ((len >> mem_width) > DWC_MAX_COUNT) {
+				dlen = DWC_MAX_COUNT << mem_width;
+				mem += dlen;
+				len -= dlen;
+			} else {
+				dlen = len;
+				len = 0;
+			}
+
+			desc->lli.ctlhi = dlen >> mem_width;
+
+			if (!first) {
+				first = desc;
+			} else {
+				prev->lli.llp = desc->txd.phys;
+				dma_sync_single_for_device(chan2parent(chan),
+						prev->txd.phys,
+						sizeof(prev->lli),
+						DMA_TO_DEVICE);
+				list_add_tail(&desc->desc_node,
+						&first->tx_list);
+			}
+			prev = desc;
+			total_len += dlen;
+
+			if (len)
+				goto slave_sg_todev_fill_desc;
+		}
+		break;
+	case DMA_DEV_TO_MEM:
+		reg_width = __fls(sconfig->src_addr_width);
+		reg = sconfig->src_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
+				| DWC_CTLL_SRC_WIDTH(reg_width)
+				| DWC_CTLL_DST_INC
+				| DWC_CTLL_SRC_FIX);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct dw_desc	*desc;
+			u32		len, dlen, mem;
+
+			mem = sg_phys(sg);
+			len = sg_dma_len(sg);
+
+			if (!((mem | len) & 7))
+				mem_width = 3;
+			else if (!((mem | len) & 3))
+				mem_width = 2;
+			else if (!((mem | len) & 1))
+				mem_width = 1;
+			else
+				mem_width = 0;
+
+slave_sg_fromdev_fill_desc:
+			desc = dwc_desc_get(dwc);
+			if (!desc) {
+				dev_err(chan2dev(chan),
+						"not enough descriptors available\n");
+				goto err_desc_get;
+			}
+
+			desc->lli.sar = reg;
+			desc->lli.dar = mem;
+			desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
+			if ((len >> reg_width) > DWC_MAX_COUNT) {
+				dlen = DWC_MAX_COUNT << reg_width;
+				mem += dlen;
+				len -= dlen;
+			} else {
+				dlen = len;
+				len = 0;
+			}
+			desc->lli.ctlhi = dlen >> reg_width;
+
+			if (!first) {
+				first = desc;
+			} else {
+				prev->lli.llp = desc->txd.phys;
+				dma_sync_single_for_device(chan2parent(chan),
+						prev->txd.phys,
+						sizeof(prev->lli),
+						DMA_TO_DEVICE);
+				list_add_tail(&desc->desc_node,
+						&first->tx_list);
+			}
+			prev = desc;
+			total_len += dlen;
+
+			if (len)
+				goto slave_sg_fromdev_fill_desc;
+		}
+		break;
+	default:
+		return NULL;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last block */
+		prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+	prev->lli.llp = 0;
+	dma_sync_single_for_device(chan2parent(chan),
+			prev->txd.phys, sizeof(prev->lli),
+			DMA_TO_DEVICE);
+
+	first->len = total_len;
+
+	return &first->txd;
+
+err_desc_get:
+	dwc_desc_put(dwc, first);
+	return NULL;
+}
+
+/*
+ * Fix sconfig's burst size according to dw_dmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+ *
+ * NOTE: burst size 2 is not supported by controller.
+ *
+ * This can be done by finding least significant bit set: n & (n - 1)
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+static int
+set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&dwc->dma_sconfig.src_maxburst);
+	convert_burst(&dwc->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
+static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		       unsigned long arg)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc, *_desc;
+	unsigned long		flags;
+	u32			cfglo;
+	LIST_HEAD(list);
+
+	if (cmd == DMA_PAUSE) {
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		cfglo = channel_readl(dwc, CFG_LO);
+		channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+		while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY))
+			cpu_relax();
+
+		dwc->paused = true;
+		spin_unlock_irqrestore(&dwc->lock, flags);
+	} else if (cmd == DMA_RESUME) {
+		if (!dwc->paused)
+			return 0;
+
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		cfglo = channel_readl(dwc, CFG_LO);
+		channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+		dwc->paused = false;
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
+	} else if (cmd == DMA_TERMINATE_ALL) {
+		spin_lock_irqsave(&dwc->lock, flags);
+
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+		while (dma_readl(dw, CH_EN) & dwc->mask)
+			cpu_relax();
+
+		dwc->paused = false;
+
+		/* active_list entries will end up before queued entries */
+		list_splice_init(&dwc->queue, &list);
+		list_splice_init(&dwc->active_list, &list);
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
+
+		/* Flush all pending and queued descriptors */
+		list_for_each_entry_safe(desc, _desc, &list, desc_node)
+			dwc_descriptor_complete(dwc, desc, false);
+	} else if (cmd == DMA_SLAVE_CONFIG) {
+		return set_runtime_config(chan, (struct dma_slave_config *)arg);
+	} else {
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static enum dma_status
+dwc_tx_status(struct dma_chan *chan,
+	      dma_cookie_t cookie,
+	      struct dma_tx_state *txstate)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	enum dma_status		ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS) {
+		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+		ret = dma_cookie_status(chan, cookie, txstate);
+	}
+
+	if (ret != DMA_SUCCESS)
+		dma_set_residue(txstate, dwc_first_active(dwc)->len);
+
+	if (dwc->paused)
+		return DMA_PAUSED;
+
+	return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+
+	if (!list_empty(&dwc->queue))
+		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc;
+	int			i;
+	unsigned long		flags;
+
+	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+	/* ASSERT:  channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+		return -EIO;
+	}
+
+	dma_cookie_init(chan);
+
+	/*
+	 * NOTE: some controllers may have additional features that we
+	 * need to initialize here, like "scatter-gather" (which
+	 * doesn't mean what you think it means), and status writeback.
+	 */
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	i = dwc->descs_allocated;
+	while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
+
+		desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
+		if (!desc) {
+			dev_info(chan2dev(chan),
+				"only allocated %d descriptors\n", i);
+			spin_lock_irqsave(&dwc->lock, flags);
+			break;
+		}
+
+		INIT_LIST_HEAD(&desc->tx_list);
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = dwc_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.phys = dma_map_single(chan2parent(chan), &desc->lli,
+				sizeof(desc->lli), DMA_TO_DEVICE);
+		dwc_desc_put(dwc, desc);
+
+		spin_lock_irqsave(&dwc->lock, flags);
+		i = ++dwc->descs_allocated;
+	}
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	dev_dbg(chan2dev(chan),
+		"alloc_chan_resources allocated %d descriptors\n", i);
+
+	return i;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc, *_desc;
+	unsigned long		flags;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+			dwc->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&dwc->active_list));
+	BUG_ON(!list_empty(&dwc->queue));
+	BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	list_splice_init(&dwc->free_list, &list);
+	dwc->descs_allocated = 0;
+	dwc->initialized = false;
+
+	/* Disable interrupts */
+	channel_clear_bit(dw, MASK.XFER, dwc->mask);
+	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
+		dma_unmap_single(chan2parent(chan), desc->txd.phys,
+				sizeof(desc->lli), DMA_TO_DEVICE);
+		kfree(desc);
+	}
+
+	dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+}
+
+/* --------------------- Cyclic DMA API extensions -------------------- */
+
+/**
+ * dw_dma_cyclic_start - start the cyclic DMA transfer
+ * @chan: the DMA channel to start
+ *
+ * Must be called with soft interrupts disabled. Returns zero on success or
+ * -errno on failure.
+ */
+int dw_dma_cyclic_start(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
+	unsigned long		flags;
+
+	if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
+		dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n");
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	/* assert channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(chan2dev(&dwc->chan),
+			"BUG: Attempted to start non-idle channel\n");
+		dev_err(chan2dev(&dwc->chan),
+			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+			channel_readl(dwc, SAR),
+			channel_readl(dwc, DAR),
+			channel_readl(dwc, LLP),
+			channel_readl(dwc, CTL_HI),
+			channel_readl(dwc, CTL_LO));
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		return -EBUSY;
+	}
+
+	dma_writel(dw, CLEAR.ERROR, dwc->mask);
+	dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+	/* setup DMAC channel registers */
+	channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys);
+	channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	channel_writel(dwc, CTL_HI, 0);
+
+	channel_set_bit(dw, CH_EN, dwc->mask);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(dw_dma_cyclic_start);
+
+/**
+ * dw_dma_cyclic_stop - stop the cyclic DMA transfer
+ * @chan: the DMA channel to stop
+ *
+ * Must be called with soft interrupts disabled.
+ */
+void dw_dma_cyclic_stop(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+}
+EXPORT_SYMBOL(dw_dma_cyclic_stop);
+
+/**
+ * dw_dma_cyclic_prep - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ *
+ * Must be called before trying to start the transfer. Returns a valid struct
+ * dw_cyclic_desc if successful or an ERR_PTR(-errno) if not successful.
+ */
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+		enum dma_transfer_direction direction)
+{
+	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
+	struct dma_slave_config		*sconfig = &dwc->dma_sconfig;
+	struct dw_cyclic_desc		*cdesc;
+	struct dw_cyclic_desc		*retval = NULL;
+	struct dw_desc			*desc;
+	struct dw_desc			*last = NULL;
+	unsigned long			was_cyclic;
+	unsigned int			reg_width;
+	unsigned int			periods;
+	unsigned int			i;
+	unsigned long			flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		dev_dbg(chan2dev(&dwc->chan),
+				"queue and/or active list are not empty\n");
+		return ERR_PTR(-EBUSY);
+	}
+
+	was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+	spin_unlock_irqrestore(&dwc->lock, flags);
+	if (was_cyclic) {
+		dev_dbg(chan2dev(&dwc->chan),
+				"channel already prepared for cyclic DMA\n");
+		return ERR_PTR(-EBUSY);
+	}
+
+	retval = ERR_PTR(-EINVAL);
+
+	if (direction == DMA_MEM_TO_DEV)
+		reg_width = __ffs(sconfig->dst_addr_width);
+	else
+		reg_width = __ffs(sconfig->src_addr_width);
+
+	periods = buf_len / period_len;
+
+	/* Check for too big/unaligned periods and unaligned DMA buffer. */
+	if (period_len > (DWC_MAX_COUNT << reg_width))
+		goto out_err;
+	if (unlikely(period_len & ((1 << reg_width) - 1)))
+		goto out_err;
+	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+		goto out_err;
+	if (unlikely(!(direction & (DMA_MEM_TO_DEV | DMA_DEV_TO_MEM))))
+		goto out_err;
+
+	retval = ERR_PTR(-ENOMEM);
+
+	if (periods > NR_DESCS_PER_CHANNEL)
+		goto out_err;
+
+	cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
+	if (!cdesc)
+		goto out_err;
+
+	cdesc->desc = kzalloc(sizeof(struct dw_desc *) * periods, GFP_KERNEL);
+	if (!cdesc->desc)
+		goto out_err_alloc;
+
+	for (i = 0; i < periods; i++) {
+		desc = dwc_desc_get(dwc);
+		if (!desc)
+			goto out_err_desc_get;
+
+		switch (direction) {
+		case DMA_MEM_TO_DEV:
+			desc->lli.dar = sconfig->dst_addr;
+			desc->lli.sar = buf_addr + (period_len * i);
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+					| DWC_CTLL_DST_WIDTH(reg_width)
+					| DWC_CTLL_SRC_WIDTH(reg_width)
+					| DWC_CTLL_DST_FIX
+					| DWC_CTLL_SRC_INC
+					| DWC_CTLL_INT_EN);
+
+			desc->lli.ctllo |= sconfig->device_fc ?
+				DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+				DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+			break;
+		case DMA_DEV_TO_MEM:
+			desc->lli.dar = buf_addr + (period_len * i);
+			desc->lli.sar = sconfig->src_addr;
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+					| DWC_CTLL_SRC_WIDTH(reg_width)
+					| DWC_CTLL_DST_WIDTH(reg_width)
+					| DWC_CTLL_DST_INC
+					| DWC_CTLL_SRC_FIX
+					| DWC_CTLL_INT_EN);
+
+			desc->lli.ctllo |= sconfig->device_fc ?
+				DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+				DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+			break;
+		default:
+			break;
+		}
+
+		desc->lli.ctlhi = (period_len >> reg_width);
+		cdesc->desc[i] = desc;
+
+		if (last) {
+			last->lli.llp = desc->txd.phys;
+			dma_sync_single_for_device(chan2parent(chan),
+					last->txd.phys, sizeof(last->lli),
+					DMA_TO_DEVICE);
+		}
+
+		last = desc;
+	}
+
+	/* lets make a cyclic list */
+	last->lli.llp = cdesc->desc[0]->txd.phys;
+	dma_sync_single_for_device(chan2parent(chan), last->txd.phys,
+			sizeof(last->lli), DMA_TO_DEVICE);
+
+	dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%08x len %zu "
+			"period %zu periods %d\n", buf_addr, buf_len,
+			period_len, periods);
+
+	cdesc->periods = periods;
+	dwc->cdesc = cdesc;
+
+	return cdesc;
+
+out_err_desc_get:
+	while (i--)
+		dwc_desc_put(dwc, cdesc->desc[i]);
+out_err_alloc:
+	kfree(cdesc);
+out_err:
+	clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+	return (struct dw_cyclic_desc *)retval;
+}
+EXPORT_SYMBOL(dw_dma_cyclic_prep);
+
+/**
+ * dw_dma_cyclic_free - free a prepared cyclic DMA transfer
+ * @chan: the DMA channel to free
+ */
+void dw_dma_cyclic_free(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
+	struct dw_cyclic_desc	*cdesc = dwc->cdesc;
+	int			i;
+	unsigned long		flags;
+
+	dev_dbg(chan2dev(&dwc->chan), "cyclic free\n");
+
+	if (!cdesc)
+		return;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+
+	dma_writel(dw, CLEAR.ERROR, dwc->mask);
+	dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	for (i = 0; i < cdesc->periods; i++)
+		dwc_desc_put(dwc, cdesc->desc[i]);
+
+	kfree(cdesc->desc);
+	kfree(cdesc);
+
+	clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+}
+EXPORT_SYMBOL(dw_dma_cyclic_free);
+
+/*----------------------------------------------------------------------*/
+
+static void dw_dma_off(struct dw_dma *dw)
+{
+	int i;
+
+	dma_writel(dw, CFG, 0);
+
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+		cpu_relax();
+
+	for (i = 0; i < dw->dma.chancnt; i++)
+		dw->chan[i].initialized = false;
+}
+
+static int __init dw_probe(struct platform_device *pdev)
+{
+	struct dw_dma_platform_data *pdata;
+	struct resource		*io;
+	struct dw_dma		*dw;
+	size_t			size;
+	int			irq;
+	int			err;
+	int			i;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
+		return -EINVAL;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	size = sizeof(struct dw_dma);
+	size += pdata->nr_channels * sizeof(struct dw_dma_chan);
+	dw = kzalloc(size, GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
+		err = -EBUSY;
+		goto err_kfree;
+	}
+
+	dw->regs = ioremap(io->start, DW_REGLEN);
+	if (!dw->regs) {
+		err = -ENOMEM;
+		goto err_release_r;
+	}
+
+	dw->clk = clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(dw->clk)) {
+		err = PTR_ERR(dw->clk);
+		goto err_clk;
+	}
+	clk_enable(dw->clk);
+
+	/* force dma off, just in case */
+	dw_dma_off(dw);
+
+	err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+	if (err)
+		goto err_irq;
+
+	platform_set_drvdata(pdev, dw);
+
+	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
+
+	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+	INIT_LIST_HEAD(&dw->dma.channels);
+	for (i = 0; i < pdata->nr_channels; i++) {
+		struct dw_dma_chan	*dwc = &dw->chan[i];
+
+		dwc->chan.device = &dw->dma;
+		dma_cookie_init(&dwc->chan);
+		if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
+			list_add_tail(&dwc->chan.device_node,
+					&dw->dma.channels);
+		else
+			list_add(&dwc->chan.device_node, &dw->dma.channels);
+
+		/* 7 is highest priority & 0 is lowest. */
+		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
+			dwc->priority = pdata->nr_channels - i - 1;
+		else
+			dwc->priority = i;
+
+		dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+		spin_lock_init(&dwc->lock);
+		dwc->mask = 1 << i;
+
+		INIT_LIST_HEAD(&dwc->active_list);
+		INIT_LIST_HEAD(&dwc->queue);
+		INIT_LIST_HEAD(&dwc->free_list);
+
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+	}
+
+	/* Clear/disable all interrupts on all channels. */
+	dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+	if (pdata->is_private)
+		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	dw->dma.dev = &pdev->dev;
+	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+	dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+
+	dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+	dw->dma.device_control = dwc_control;
+
+	dw->dma.device_tx_status = dwc_tx_status;
+	dw->dma.device_issue_pending = dwc_issue_pending;
+
+	dma_writel(dw, CFG, DW_CFG_DMA_EN);
+
+	printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
+			dev_name(&pdev->dev), pdata->nr_channels);
+
+	dma_async_device_register(&dw->dma);
+
+	return 0;
+
+err_irq:
+	clk_disable(dw->clk);
+	clk_put(dw->clk);
+err_clk:
+	iounmap(dw->regs);
+	dw->regs = NULL;
+err_release_r:
+	release_resource(io);
+err_kfree:
+	kfree(dw);
+	return err;
+}
+
+static int __exit dw_remove(struct platform_device *pdev)
+{
+	struct dw_dma		*dw = platform_get_drvdata(pdev);
+	struct dw_dma_chan	*dwc, *_dwc;
+	struct resource		*io;
+
+	dw_dma_off(dw);
+	dma_async_device_unregister(&dw->dma);
+
+	free_irq(platform_get_irq(pdev, 0), dw);
+	tasklet_kill(&dw->tasklet);
+
+	list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+			chan.device_node) {
+		list_del(&dwc->chan.device_node);
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+	}
+
+	clk_disable(dw->clk);
+	clk_put(dw->clk);
+
+	iounmap(dw->regs);
+	dw->regs = NULL;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(io->start, DW_REGLEN);
+
+	kfree(dw);
+
+	return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+	struct dw_dma	*dw = platform_get_drvdata(pdev);
+
+	dw_dma_off(platform_get_drvdata(pdev));
+	clk_disable(dw->clk);
+}
+
+static int dw_suspend_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct dw_dma	*dw = platform_get_drvdata(pdev);
+
+	dw_dma_off(platform_get_drvdata(pdev));
+	clk_disable(dw->clk);
+
+	return 0;
+}
+
+static int dw_resume_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct dw_dma	*dw = platform_get_drvdata(pdev);
+
+	clk_enable(dw->clk);
+	dma_writel(dw, CFG, DW_CFG_DMA_EN);
+	return 0;
+}
+
+static const struct dev_pm_ops dw_dev_pm_ops = {
+	.suspend_noirq = dw_suspend_noirq,
+	.resume_noirq = dw_resume_noirq,
+	.freeze_noirq = dw_suspend_noirq,
+	.thaw_noirq = dw_resume_noirq,
+	.restore_noirq = dw_resume_noirq,
+	.poweroff_noirq = dw_suspend_noirq,
+};
+
+static struct platform_driver dw_driver = {
+	.remove		= __exit_p(dw_remove),
+	.shutdown	= dw_shutdown,
+	.driver = {
+		.name	= "dw_dmac",
+		.pm	= &dw_dev_pm_ops,
+	},
+};
+
+static int __init dw_init(void)
+{
+	return platform_driver_probe(&dw_driver, dw_probe);
+}
+subsys_initcall(dw_init);
+
+static void __exit dw_exit(void)
+{
+	platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
+MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
new file mode 100644
index 00000000..f298f69e
--- /dev/null
+++ b/drivers/dma/dw_dmac_regs.h
@@ -0,0 +1,250 @@
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dw_dmac.h>
+
+#define DW_DMA_MAX_NR_CHANNELS	8
+
+/* flow controller */
+enum dw_dma_fc {
+	DW_DMA_FC_D_M2M,
+	DW_DMA_FC_D_M2P,
+	DW_DMA_FC_D_P2M,
+	DW_DMA_FC_D_P2P,
+	DW_DMA_FC_P_P2M,
+	DW_DMA_FC_SP_P2P,
+	DW_DMA_FC_P_M2P,
+	DW_DMA_FC_DP_P2P,
+};
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name)		u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+	DW_REG(SAR);		/* Source Address Register */
+	DW_REG(DAR);		/* Destination Address Register */
+	DW_REG(LLP);		/* Linked List Pointer */
+	u32	CTL_LO;		/* Control Register Low */
+	u32	CTL_HI;		/* Control Register High */
+	DW_REG(SSTAT);
+	DW_REG(DSTAT);
+	DW_REG(SSTATAR);
+	DW_REG(DSTATAR);
+	u32	CFG_LO;		/* Configuration Register Low */
+	u32	CFG_HI;		/* Configuration Register High */
+	DW_REG(SGR);
+	DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+	DW_REG(XFER);
+	DW_REG(BLOCK);
+	DW_REG(SRC_TRAN);
+	DW_REG(DST_TRAN);
+	DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+	/* per-channel registers */
+	struct dw_dma_chan_regs	CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+	/* irq handling */
+	struct dw_dma_irq_regs	RAW;		/* r */
+	struct dw_dma_irq_regs	STATUS;		/* r (raw & mask) */
+	struct dw_dma_irq_regs	MASK;		/* rw (set = irq enabled) */
+	struct dw_dma_irq_regs	CLEAR;		/* w (ack, affects "raw") */
+
+	DW_REG(STATUS_INT);			/* r */
+
+	/* software handshaking */
+	DW_REG(REQ_SRC);
+	DW_REG(REQ_DST);
+	DW_REG(SGL_REQ_SRC);
+	DW_REG(SGL_REQ_DST);
+	DW_REG(LAST_SRC);
+	DW_REG(LAST_DST);
+
+	/* miscellaneous */
+	DW_REG(CFG);
+	DW_REG(CH_EN);
+	DW_REG(ID);
+	DW_REG(TEST);
+
+	/* optional encoded params, 0x3c8..0x3 */
+};
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n)	((n)<<4)
+#define DWC_CTLL_DST_INC	(0<<7)		/* DAR update/not */
+#define DWC_CTLL_DST_DEC	(1<<7)
+#define DWC_CTLL_DST_FIX	(2<<7)
+#define DWC_CTLL_SRC_INC	(0<<7)		/* SAR update/not */
+#define DWC_CTLL_SRC_DEC	(1<<9)
+#define DWC_CTLL_SRC_FIX	(2<<9)
+#define DWC_CTLL_DST_MSIZE(n)	((n)<<11)	/* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n)	((n)<<14)
+#define DWC_CTLL_S_GATH_EN	(1 << 17)	/* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN	(1 << 18)	/* dst scatter, !FIX */
+#define DWC_CTLL_FC(n)		((n) << 20)
+#define DWC_CTLL_FC_M2M		(0 << 20)	/* mem-to-mem */
+#define DWC_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
+#define DWC_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
+#define DWC_CTLL_FC_P2P		(3 << 20)	/* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n)		((n)<<23)	/* dst master select */
+#define DWC_CTLL_SMS(n)		((n)<<25)	/* src master select */
+#define DWC_CTLL_LLP_D_EN	(1 << 27)	/* dest block chain */
+#define DWC_CTLL_LLP_S_EN	(1 << 28)	/* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_DONE		0x00001000
+#define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
+
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
+#define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
+#define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
+#define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
+#define DWC_CFGL_HS_SRC		(1 << 11)	/* handshake w/src */
+#define DWC_CFGL_MAX_BURST(x)	((x) << 20)
+#define DWC_CFGL_RELOAD_SAR	(1 << 30)
+#define DWC_CFGL_RELOAD_DAR	(1 << 31)
+
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGH_DS_UPD_EN	(1 << 5)
+#define DWC_CFGH_SS_UPD_EN	(1 << 6)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x)		((x) << 0)
+#define DWC_SGR_SGC(x)		((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x)		((x) << 0)
+#define DWC_DSR_DSC(x)		((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN		(1 << 0)
+
+#define DW_REGLEN		0x400
+
+enum dw_dmac_flags {
+	DW_DMA_IS_CYCLIC = 0,
+};
+
+struct dw_dma_chan {
+	struct dma_chan		chan;
+	void __iomem		*ch_regs;
+	u8			mask;
+	u8			priority;
+	bool			paused;
+	bool			initialized;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	unsigned long		flags;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	struct dw_cyclic_desc	*cdesc;
+
+	unsigned int		descs_allocated;
+
+	/* configuration passed via DMA_SLAVE_CONFIG */
+	struct dma_slave_config dma_sconfig;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+	return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+	readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+	writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct dw_dma_chan, chan);
+}
+
+struct dw_dma {
+	struct dma_device	dma;
+	void __iomem		*regs;
+	struct tasklet_struct	tasklet;
+	struct clk		*clk;
+
+	u8			all_chan_mask;
+
+	struct dw_dma_chan	chan[0];
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+	return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+	readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+	writel((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+	dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+	dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+	/* values that are not changed by hardware */
+	dma_addr_t	sar;
+	dma_addr_t	dar;
+	dma_addr_t	llp;		/* chain to next lli */
+	u32		ctllo;
+	/* values that may get written back: */
+	u32		ctlhi;
+	/* sstat and dstat can snapshot peripheral register state.
+	 * silicon config may discard either or both...
+	 */
+	u32		sstat;
+	u32		dstat;
+};
+
+struct dw_desc {
+	/* FIRST values the hardware uses */
+	struct dw_lli			lli;
+
+	/* THEN values for driver housekeeping */
+	struct list_head		desc_node;
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+};
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct dw_desc, txd);
+}
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
new file mode 100644
index 00000000..f6e9b572
--- /dev/null
+++ b/drivers/dma/ep93xx_dma.c
@@ -0,0 +1,1383 @@
+/*
+ * Driver for the Cirrus Logic EP93xx DMA Controller
+ *
+ * Copyright (C) 2011 Mika Westerberg
+ *
+ * DMA M2P implementation is based on the original
+ * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights:
+ *
+ *   Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
+ *   Copyright (C) 2006 Applied Data Systems
+ *   Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com>
+ *
+ * This driver is based on dw_dmac and amba-pl08x drivers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <mach/dma.h>
+
+#include "dmaengine.h"
+
+/* M2P registers */
+#define M2P_CONTROL			0x0000
+#define M2P_CONTROL_STALLINT		BIT(0)
+#define M2P_CONTROL_NFBINT		BIT(1)
+#define M2P_CONTROL_CH_ERROR_INT	BIT(3)
+#define M2P_CONTROL_ENABLE		BIT(4)
+#define M2P_CONTROL_ICE			BIT(6)
+
+#define M2P_INTERRUPT			0x0004
+#define M2P_INTERRUPT_STALL		BIT(0)
+#define M2P_INTERRUPT_NFB		BIT(1)
+#define M2P_INTERRUPT_ERROR		BIT(3)
+
+#define M2P_PPALLOC			0x0008
+#define M2P_STATUS			0x000c
+
+#define M2P_MAXCNT0			0x0020
+#define M2P_BASE0			0x0024
+#define M2P_MAXCNT1			0x0030
+#define M2P_BASE1			0x0034
+
+#define M2P_STATE_IDLE			0
+#define M2P_STATE_STALL			1
+#define M2P_STATE_ON			2
+#define M2P_STATE_NEXT			3
+
+/* M2M registers */
+#define M2M_CONTROL			0x0000
+#define M2M_CONTROL_DONEINT		BIT(2)
+#define M2M_CONTROL_ENABLE		BIT(3)
+#define M2M_CONTROL_START		BIT(4)
+#define M2M_CONTROL_DAH			BIT(11)
+#define M2M_CONTROL_SAH			BIT(12)
+#define M2M_CONTROL_PW_SHIFT		9
+#define M2M_CONTROL_PW_8		(0 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_16		(1 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_32		(2 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_MASK		(3 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_TM_SHIFT		13
+#define M2M_CONTROL_TM_TX		(1 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_TM_RX		(2 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_RSS_SHIFT		22
+#define M2M_CONTROL_RSS_SSPRX		(1 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_SSPTX		(2 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_IDE		(3 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_NO_HDSK		BIT(24)
+#define M2M_CONTROL_PWSC_SHIFT		25
+
+#define M2M_INTERRUPT			0x0004
+#define M2M_INTERRUPT_DONEINT		BIT(1)
+
+#define M2M_BCR0			0x0010
+#define M2M_BCR1			0x0014
+#define M2M_SAR_BASE0			0x0018
+#define M2M_SAR_BASE1			0x001c
+#define M2M_DAR_BASE0			0x002c
+#define M2M_DAR_BASE1			0x0030
+
+#define DMA_MAX_CHAN_BYTES		0xffff
+#define DMA_MAX_CHAN_DESCRIPTORS	32
+
+struct ep93xx_dma_engine;
+
+/**
+ * struct ep93xx_dma_desc - EP93xx specific transaction descriptor
+ * @src_addr: source address of the transaction
+ * @dst_addr: destination address of the transaction
+ * @size: size of the transaction (in bytes)
+ * @complete: this descriptor is completed
+ * @txd: dmaengine API descriptor
+ * @tx_list: list of linked descriptors
+ * @node: link used for putting this into a channel queue
+ */
+struct ep93xx_dma_desc {
+	u32				src_addr;
+	u32				dst_addr;
+	size_t				size;
+	bool				complete;
+	struct dma_async_tx_descriptor	txd;
+	struct list_head		tx_list;
+	struct list_head		node;
+};
+
+/**
+ * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel
+ * @chan: dmaengine API channel
+ * @edma: pointer to to the engine device
+ * @regs: memory mapped registers
+ * @irq: interrupt number of the channel
+ * @clk: clock used by this channel
+ * @tasklet: channel specific tasklet used for callbacks
+ * @lock: lock protecting the fields following
+ * @flags: flags for the channel
+ * @buffer: which buffer to use next (0/1)
+ * @active: flattened chain of descriptors currently being processed
+ * @queue: pending descriptors which are handled next
+ * @free_list: list of free descriptors which can be used
+ * @runtime_addr: physical address currently used as dest/src (M2M only). This
+ *                is set via %DMA_SLAVE_CONFIG before slave operation is
+ *                prepared
+ * @runtime_ctrl: M2M runtime values for the control register.
+ *
+ * As EP93xx DMA controller doesn't support real chained DMA descriptors we
+ * will have slightly different scheme here: @active points to a head of
+ * flattened DMA descriptor chain.
+ *
+ * @queue holds pending transactions. These are linked through the first
+ * descriptor in the chain. When a descriptor is moved to the @active queue,
+ * the first and chained descriptors are flattened into a single list.
+ *
+ * @chan.private holds pointer to &struct ep93xx_dma_data which contains
+ * necessary channel configuration information. For memcpy channels this must
+ * be %NULL.
+ */
+struct ep93xx_dma_chan {
+	struct dma_chan			chan;
+	const struct ep93xx_dma_engine	*edma;
+	void __iomem			*regs;
+	int				irq;
+	struct clk			*clk;
+	struct tasklet_struct		tasklet;
+	/* protects the fields following */
+	spinlock_t			lock;
+	unsigned long			flags;
+/* Channel is configured for cyclic transfers */
+#define EP93XX_DMA_IS_CYCLIC		0
+
+	int				buffer;
+	struct list_head		active;
+	struct list_head		queue;
+	struct list_head		free_list;
+	u32				runtime_addr;
+	u32				runtime_ctrl;
+};
+
+/**
+ * struct ep93xx_dma_engine - the EP93xx DMA engine instance
+ * @dma_dev: holds the dmaengine device
+ * @m2m: is this an M2M or M2P device
+ * @hw_setup: method which sets the channel up for operation
+ * @hw_shutdown: shuts the channel down and flushes whatever is left
+ * @hw_submit: pushes active descriptor(s) to the hardware
+ * @hw_interrupt: handle the interrupt
+ * @num_channels: number of channels for this instance
+ * @channels: array of channels
+ *
+ * There is one instance of this struct for the M2P channels and one for the
+ * M2M channels. hw_xxx() methods are used to perform operations which are
+ * different on M2M and M2P channels. These methods are called with channel
+ * lock held and interrupts disabled so they cannot sleep.
+ */
+struct ep93xx_dma_engine {
+	struct dma_device	dma_dev;
+	bool			m2m;
+	int			(*hw_setup)(struct ep93xx_dma_chan *);
+	void			(*hw_shutdown)(struct ep93xx_dma_chan *);
+	void			(*hw_submit)(struct ep93xx_dma_chan *);
+	int			(*hw_interrupt)(struct ep93xx_dma_chan *);
+#define INTERRUPT_UNKNOWN	0
+#define INTERRUPT_DONE		1
+#define INTERRUPT_NEXT_BUFFER	2
+
+	size_t			num_channels;
+	struct ep93xx_dma_chan	channels[];
+};
+
+static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac)
+{
+	return &edmac->chan.dev->device;
+}
+
+static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct ep93xx_dma_chan, chan);
+}
+
+/**
+ * ep93xx_dma_set_active - set new active descriptor chain
+ * @edmac: channel
+ * @desc: head of the new active descriptor chain
+ *
+ * Sets @desc to be the head of the new active descriptor chain. This is the
+ * chain which is processed next. The active list must be empty before calling
+ * this function.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac,
+				  struct ep93xx_dma_desc *desc)
+{
+	BUG_ON(!list_empty(&edmac->active));
+
+	list_add_tail(&desc->node, &edmac->active);
+
+	/* Flatten the @desc->tx_list chain into @edmac->active list */
+	while (!list_empty(&desc->tx_list)) {
+		struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list,
+			struct ep93xx_dma_desc, node);
+
+		/*
+		 * We copy the callback parameters from the first descriptor
+		 * to all the chained descriptors. This way we can call the
+		 * callback without having to find out the first descriptor in
+		 * the chain. Useful for cyclic transfers.
+		 */
+		d->txd.callback = desc->txd.callback;
+		d->txd.callback_param = desc->txd.callback_param;
+
+		list_move_tail(&d->node, &edmac->active);
+	}
+}
+
+/* Called with @edmac->lock held and interrupts disabled */
+static struct ep93xx_dma_desc *
+ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac)
+{
+	if (list_empty(&edmac->active))
+		return NULL;
+
+	return list_first_entry(&edmac->active, struct ep93xx_dma_desc, node);
+}
+
+/**
+ * ep93xx_dma_advance_active - advances to the next active descriptor
+ * @edmac: channel
+ *
+ * Function advances active descriptor to the next in the @edmac->active and
+ * returns %true if we still have descriptors in the chain to process.
+ * Otherwise returns %false.
+ *
+ * When the channel is in cyclic mode always returns %true.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc;
+
+	list_rotate_left(&edmac->active);
+
+	if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+		return true;
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc)
+		return false;
+
+	/*
+	 * If txd.cookie is set it means that we are back in the first
+	 * descriptor in the chain and hence done with it.
+	 */
+	return !desc->txd.cookie;
+}
+
+/*
+ * M2P DMA implementation
+ */
+
+static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control)
+{
+	writel(control, edmac->regs + M2P_CONTROL);
+	/*
+	 * EP93xx User's Guide states that we must perform a dummy read after
+	 * write to the control register.
+	 */
+	readl(edmac->regs + M2P_CONTROL);
+}
+
+static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control;
+
+	writel(data->port & 0xf, edmac->regs + M2P_PPALLOC);
+
+	control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE
+		| M2P_CONTROL_ENABLE;
+	m2p_set_control(edmac, control);
+
+	return 0;
+}
+
+static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
+{
+	return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
+}
+
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+	u32 control;
+
+	control = readl(edmac->regs + M2P_CONTROL);
+	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+	m2p_set_control(edmac, control);
+
+	while (m2p_channel_state(edmac) >= M2P_STATE_ON)
+		cpu_relax();
+
+	m2p_set_control(edmac, 0);
+
+	while (m2p_channel_state(edmac) == M2P_STATE_STALL)
+		cpu_relax();
+}
+
+static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc;
+	u32 bus_addr;
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc) {
+		dev_warn(chan2dev(edmac), "M2P: empty descriptor list\n");
+		return;
+	}
+
+	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
+		bus_addr = desc->src_addr;
+	else
+		bus_addr = desc->dst_addr;
+
+	if (edmac->buffer == 0) {
+		writel(desc->size, edmac->regs + M2P_MAXCNT0);
+		writel(bus_addr, edmac->regs + M2P_BASE0);
+	} else {
+		writel(desc->size, edmac->regs + M2P_MAXCNT1);
+		writel(bus_addr, edmac->regs + M2P_BASE1);
+	}
+
+	edmac->buffer ^= 1;
+}
+
+static void m2p_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+	u32 control = readl(edmac->regs + M2P_CONTROL);
+
+	m2p_fill_desc(edmac);
+	control |= M2P_CONTROL_STALLINT;
+
+	if (ep93xx_dma_advance_active(edmac)) {
+		m2p_fill_desc(edmac);
+		control |= M2P_CONTROL_NFBINT;
+	}
+
+	m2p_set_control(edmac, control);
+}
+
+static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+	u32 irq_status = readl(edmac->regs + M2P_INTERRUPT);
+	u32 control;
+
+	if (irq_status & M2P_INTERRUPT_ERROR) {
+		struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+
+		/* Clear the error interrupt */
+		writel(1, edmac->regs + M2P_INTERRUPT);
+
+		/*
+		 * It seems that there is no easy way of reporting errors back
+		 * to client so we just report the error here and continue as
+		 * usual.
+		 *
+		 * Revisit this when there is a mechanism to report back the
+		 * errors.
+		 */
+		dev_err(chan2dev(edmac),
+			"DMA transfer failed! Details:\n"
+			"\tcookie	: %d\n"
+			"\tsrc_addr	: 0x%08x\n"
+			"\tdst_addr	: 0x%08x\n"
+			"\tsize		: %zu\n",
+			desc->txd.cookie, desc->src_addr, desc->dst_addr,
+			desc->size);
+	}
+
+	switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) {
+	case M2P_INTERRUPT_STALL:
+		/* Disable interrupts */
+		control = readl(edmac->regs + M2P_CONTROL);
+		control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+		m2p_set_control(edmac, control);
+
+		return INTERRUPT_DONE;
+
+	case M2P_INTERRUPT_NFB:
+		if (ep93xx_dma_advance_active(edmac))
+			m2p_fill_desc(edmac);
+
+		return INTERRUPT_NEXT_BUFFER;
+	}
+
+	return INTERRUPT_UNKNOWN;
+}
+
+/*
+ * M2M DMA implementation
+ *
+ * For the M2M transfers we don't use NFB at all. This is because it simply
+ * doesn't work well with memcpy transfers. When you submit both buffers it is
+ * extremely unlikely that you get an NFB interrupt, but it instead reports
+ * DONE interrupt and both buffers are already transferred which means that we
+ * weren't able to update the next buffer.
+ *
+ * So for now we "simulate" NFB by just submitting buffer after buffer
+ * without double buffering.
+ */
+
+static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+	const struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control = 0;
+
+	if (!data) {
+		/* This is memcpy channel, nothing to configure */
+		writel(control, edmac->regs + M2M_CONTROL);
+		return 0;
+	}
+
+	switch (data->port) {
+	case EP93XX_DMA_SSP:
+		/*
+		 * This was found via experimenting - anything less than 5
+		 * causes the channel to perform only a partial transfer which
+		 * leads to problems since we don't get DONE interrupt then.
+		 */
+		control = (5 << M2M_CONTROL_PWSC_SHIFT);
+		control |= M2M_CONTROL_NO_HDSK;
+
+		if (data->direction == DMA_MEM_TO_DEV) {
+			control |= M2M_CONTROL_DAH;
+			control |= M2M_CONTROL_TM_TX;
+			control |= M2M_CONTROL_RSS_SSPTX;
+		} else {
+			control |= M2M_CONTROL_SAH;
+			control |= M2M_CONTROL_TM_RX;
+			control |= M2M_CONTROL_RSS_SSPRX;
+		}
+		break;
+
+	case EP93XX_DMA_IDE:
+		/*
+		 * This IDE part is totally untested. Values below are taken
+		 * from the EP93xx Users's Guide and might not be correct.
+		 */
+		if (data->direction == DMA_MEM_TO_DEV) {
+			/* Worst case from the UG */
+			control = (3 << M2M_CONTROL_PWSC_SHIFT);
+			control |= M2M_CONTROL_DAH;
+			control |= M2M_CONTROL_TM_TX;
+		} else {
+			control = (2 << M2M_CONTROL_PWSC_SHIFT);
+			control |= M2M_CONTROL_SAH;
+			control |= M2M_CONTROL_TM_RX;
+		}
+
+		control |= M2M_CONTROL_NO_HDSK;
+		control |= M2M_CONTROL_RSS_IDE;
+		control |= M2M_CONTROL_PW_16;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	writel(control, edmac->regs + M2M_CONTROL);
+	return 0;
+}
+
+static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+	/* Just disable the channel */
+	writel(0, edmac->regs + M2M_CONTROL);
+}
+
+static void m2m_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc;
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc) {
+		dev_warn(chan2dev(edmac), "M2M: empty descriptor list\n");
+		return;
+	}
+
+	if (edmac->buffer == 0) {
+		writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0);
+		writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0);
+		writel(desc->size, edmac->regs + M2M_BCR0);
+	} else {
+		writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1);
+		writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1);
+		writel(desc->size, edmac->regs + M2M_BCR1);
+	}
+
+	edmac->buffer ^= 1;
+}
+
+static void m2m_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control = readl(edmac->regs + M2M_CONTROL);
+
+	/*
+	 * Since we allow clients to configure PW (peripheral width) we always
+	 * clear PW bits here and then set them according what is given in
+	 * the runtime configuration.
+	 */
+	control &= ~M2M_CONTROL_PW_MASK;
+	control |= edmac->runtime_ctrl;
+
+	m2m_fill_desc(edmac);
+	control |= M2M_CONTROL_DONEINT;
+
+	/*
+	 * Now we can finally enable the channel. For M2M channel this must be
+	 * done _after_ the BCRx registers are programmed.
+	 */
+	control |= M2M_CONTROL_ENABLE;
+	writel(control, edmac->regs + M2M_CONTROL);
+
+	if (!data) {
+		/*
+		 * For memcpy channels the software trigger must be asserted
+		 * in order to start the memcpy operation.
+		 */
+		control |= M2M_CONTROL_START;
+		writel(control, edmac->regs + M2M_CONTROL);
+	}
+}
+
+static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+	u32 control;
+
+	if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_DONEINT))
+		return INTERRUPT_UNKNOWN;
+
+	/* Clear the DONE bit */
+	writel(0, edmac->regs + M2M_INTERRUPT);
+
+	/* Disable interrupts and the channel */
+	control = readl(edmac->regs + M2M_CONTROL);
+	control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_ENABLE);
+	writel(control, edmac->regs + M2M_CONTROL);
+
+	/*
+	 * Since we only get DONE interrupt we have to find out ourselves
+	 * whether there still is something to process. So we try to advance
+	 * the chain an see whether it succeeds.
+	 */
+	if (ep93xx_dma_advance_active(edmac)) {
+		edmac->edma->hw_submit(edmac);
+		return INTERRUPT_NEXT_BUFFER;
+	}
+
+	return INTERRUPT_DONE;
+}
+
+/*
+ * DMA engine API implementation
+ */
+
+static struct ep93xx_dma_desc *
+ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc, *_desc;
+	struct ep93xx_dma_desc *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del_init(&desc->node);
+
+			/* Re-initialize the descriptor */
+			desc->src_addr = 0;
+			desc->dst_addr = 0;
+			desc->size = 0;
+			desc->complete = false;
+			desc->txd.cookie = 0;
+			desc->txd.callback = NULL;
+			desc->txd.callback_param = NULL;
+
+			ret = desc;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&edmac->lock, flags);
+	return ret;
+}
+
+static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac,
+				struct ep93xx_dma_desc *desc)
+{
+	if (desc) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&edmac->lock, flags);
+		list_splice_init(&desc->tx_list, &edmac->free_list);
+		list_add(&desc->node, &edmac->free_list);
+		spin_unlock_irqrestore(&edmac->lock, flags);
+	}
+}
+
+/**
+ * ep93xx_dma_advance_work - start processing the next pending transaction
+ * @edmac: channel
+ *
+ * If we have pending transactions queued and we are currently idling, this
+ * function takes the next queued transaction from the @edmac->queue and
+ * pushes it to the hardware for execution.
+ */
+static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *new;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) {
+		spin_unlock_irqrestore(&edmac->lock, flags);
+		return;
+	}
+
+	/* Take the next descriptor from the pending queue */
+	new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node);
+	list_del_init(&new->node);
+
+	ep93xx_dma_set_active(edmac, new);
+
+	/* Push it to the hardware */
+	edmac->edma->hw_submit(edmac);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+}
+
+static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
+{
+	struct device *dev = desc->txd.chan->device->dev;
+
+	if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			dma_unmap_single(dev, desc->src_addr, desc->size,
+					 DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, desc->src_addr, desc->size,
+				       DMA_TO_DEVICE);
+	}
+	if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			dma_unmap_single(dev, desc->dst_addr, desc->size,
+					 DMA_FROM_DEVICE);
+		else
+			dma_unmap_page(dev, desc->dst_addr, desc->size,
+				       DMA_FROM_DEVICE);
+	}
+}
+
+static void ep93xx_dma_tasklet(unsigned long data)
+{
+	struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
+	struct ep93xx_dma_desc *desc, *d;
+	dma_async_tx_callback callback = NULL;
+	void *callback_param = NULL;
+	LIST_HEAD(list);
+
+	spin_lock_irq(&edmac->lock);
+	/*
+	 * If dma_terminate_all() was called before we get to run, the active
+	 * list has become empty. If that happens we aren't supposed to do
+	 * anything more than call ep93xx_dma_advance_work().
+	 */
+	desc = ep93xx_dma_get_active(edmac);
+	if (desc) {
+		if (desc->complete) {
+			/* mark descriptor complete for non cyclic case only */
+			if (!test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+				dma_cookie_complete(&desc->txd);
+			list_splice_init(&edmac->active, &list);
+		}
+		callback = desc->txd.callback;
+		callback_param = desc->txd.callback_param;
+	}
+	spin_unlock_irq(&edmac->lock);
+
+	/* Pick up the next descriptor from the queue */
+	ep93xx_dma_advance_work(edmac);
+
+	/* Now we can release all the chained descriptors */
+	list_for_each_entry_safe(desc, d, &list, node) {
+		/*
+		 * For the memcpy channels the API requires us to unmap the
+		 * buffers unless requested otherwise.
+		 */
+		if (!edmac->chan.private)
+			ep93xx_dma_unmap_buffers(desc);
+
+		ep93xx_dma_desc_put(edmac, desc);
+	}
+
+	if (callback)
+		callback(callback_param);
+}
+
+static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id)
+{
+	struct ep93xx_dma_chan *edmac = dev_id;
+	struct ep93xx_dma_desc *desc;
+	irqreturn_t ret = IRQ_HANDLED;
+
+	spin_lock(&edmac->lock);
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc) {
+		dev_warn(chan2dev(edmac),
+			 "got interrupt while active list is empty\n");
+		spin_unlock(&edmac->lock);
+		return IRQ_NONE;
+	}
+
+	switch (edmac->edma->hw_interrupt(edmac)) {
+	case INTERRUPT_DONE:
+		desc->complete = true;
+		tasklet_schedule(&edmac->tasklet);
+		break;
+
+	case INTERRUPT_NEXT_BUFFER:
+		if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+			tasklet_schedule(&edmac->tasklet);
+		break;
+
+	default:
+		dev_warn(chan2dev(edmac), "unknown interrupt!\n");
+		ret = IRQ_NONE;
+		break;
+	}
+
+	spin_unlock(&edmac->lock);
+	return ret;
+}
+
+/**
+ * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed
+ * @tx: descriptor to be executed
+ *
+ * Function will execute given descriptor on the hardware or if the hardware
+ * is busy, queue the descriptor to be executed later on. Returns cookie which
+ * can be used to poll the status of the descriptor.
+ */
+static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan);
+	struct ep93xx_dma_desc *desc;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	desc = container_of(tx, struct ep93xx_dma_desc, txd);
+
+	/*
+	 * If nothing is currently prosessed, we push this descriptor
+	 * directly to the hardware. Otherwise we put the descriptor
+	 * to the pending queue.
+	 */
+	if (list_empty(&edmac->active)) {
+		ep93xx_dma_set_active(edmac, desc);
+		edmac->edma->hw_submit(edmac);
+	} else {
+		list_add_tail(&desc->node, &edmac->queue);
+	}
+
+	spin_unlock_irqrestore(&edmac->lock, flags);
+	return cookie;
+}
+
+/**
+ * ep93xx_dma_alloc_chan_resources - allocate resources for the channel
+ * @chan: channel to allocate resources
+ *
+ * Function allocates necessary resources for the given DMA channel and
+ * returns number of allocated descriptors for the channel. Negative errno
+ * is returned in case of failure.
+ */
+static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_data *data = chan->private;
+	const char *name = dma_chan_name(chan);
+	int ret, i;
+
+	/* Sanity check the channel parameters */
+	if (!edmac->edma->m2m) {
+		if (!data)
+			return -EINVAL;
+		if (data->port < EP93XX_DMA_I2S1 ||
+		    data->port > EP93XX_DMA_IRDA)
+			return -EINVAL;
+		if (data->direction != ep93xx_dma_chan_direction(chan))
+			return -EINVAL;
+	} else {
+		if (data) {
+			switch (data->port) {
+			case EP93XX_DMA_SSP:
+			case EP93XX_DMA_IDE:
+				if (data->direction != DMA_MEM_TO_DEV &&
+				    data->direction != DMA_DEV_TO_MEM)
+					return -EINVAL;
+				break;
+			default:
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (data && data->name)
+		name = data->name;
+
+	ret = clk_enable(edmac->clk);
+	if (ret)
+		return ret;
+
+	ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac);
+	if (ret)
+		goto fail_clk_disable;
+
+	spin_lock_irq(&edmac->lock);
+	dma_cookie_init(&edmac->chan);
+	ret = edmac->edma->hw_setup(edmac);
+	spin_unlock_irq(&edmac->lock);
+
+	if (ret)
+		goto fail_free_irq;
+
+	for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) {
+		struct ep93xx_dma_desc *desc;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "not enough descriptors\n");
+			break;
+		}
+
+		INIT_LIST_HEAD(&desc->tx_list);
+
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.tx_submit = ep93xx_dma_tx_submit;
+
+		ep93xx_dma_desc_put(edmac, desc);
+	}
+
+	return i;
+
+fail_free_irq:
+	free_irq(edmac->irq, edmac);
+fail_clk_disable:
+	clk_disable(edmac->clk);
+
+	return ret;
+}
+
+/**
+ * ep93xx_dma_free_chan_resources - release resources for the channel
+ * @chan: channel
+ *
+ * Function releases all the resources allocated for the given channel.
+ * The channel must be idle when this is called.
+ */
+static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *d;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	BUG_ON(!list_empty(&edmac->active));
+	BUG_ON(!list_empty(&edmac->queue));
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	edmac->edma->hw_shutdown(edmac);
+	edmac->runtime_addr = 0;
+	edmac->runtime_ctrl = 0;
+	edmac->buffer = 0;
+	list_splice_init(&edmac->free_list, &list);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	list_for_each_entry_safe(desc, d, &list, node)
+		kfree(desc);
+
+	clk_disable(edmac->clk);
+	free_irq(edmac->irq, edmac);
+}
+
+/**
+ * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation
+ * @chan: channel
+ * @dest: destination bus address
+ * @src: source bus address
+ * @len: size of the transaction
+ * @flags: flags for the descriptor
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			   dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	size_t bytes, offset;
+
+	first = NULL;
+	for (offset = 0; offset < len; offset += bytes) {
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES);
+
+		desc->src_addr = src + offset;
+		desc->dst_addr = dest + offset;
+		desc->size = bytes;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	first->txd.flags = flags;
+
+	return &first->txd;
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation
+ * @chan: channel
+ * @sgl: list of buffers to transfer
+ * @sg_len: number of entries in @sgl
+ * @dir: direction of tha DMA transfer
+ * @flags: flags for the descriptor
+ * @context: operation context (ignored)
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
+			 unsigned long flags, void *context)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	struct scatterlist *sg;
+	int i;
+
+	if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+		dev_warn(chan2dev(edmac),
+			 "channel was configured with different direction\n");
+		return NULL;
+	}
+
+	if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+		dev_warn(chan2dev(edmac),
+			 "channel is already used for cyclic transfers\n");
+		return NULL;
+	}
+
+	first = NULL;
+	for_each_sg(sgl, sg, sg_len, i) {
+		size_t sg_len = sg_dma_len(sg);
+
+		if (sg_len > DMA_MAX_CHAN_BYTES) {
+			dev_warn(chan2dev(edmac), "too big transfer size %d\n",
+				 sg_len);
+			goto fail;
+		}
+
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		if (dir == DMA_MEM_TO_DEV) {
+			desc->src_addr = sg_dma_address(sg);
+			desc->dst_addr = edmac->runtime_addr;
+		} else {
+			desc->src_addr = edmac->runtime_addr;
+			desc->dst_addr = sg_dma_address(sg);
+		}
+		desc->size = sg_len;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	first->txd.flags = flags;
+
+	return &first->txd;
+
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation
+ * @chan: channel
+ * @dma_addr: DMA mapped address of the buffer
+ * @buf_len: length of the buffer (in bytes)
+ * @period_len: lenght of a single period
+ * @dir: direction of the operation
+ * @context: operation context (ignored)
+ *
+ * Prepares a descriptor for cyclic DMA operation. This means that once the
+ * descriptor is submitted, we will be submitting in a @period_len sized
+ * buffers and calling callback once the period has been elapsed. Transfer
+ * terminates only when client calls dmaengine_terminate_all() for this
+ * channel.
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+			   size_t buf_len, size_t period_len,
+			   enum dma_transfer_direction dir, void *context)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	size_t offset = 0;
+
+	if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+		dev_warn(chan2dev(edmac),
+			 "channel was configured with different direction\n");
+		return NULL;
+	}
+
+	if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+		dev_warn(chan2dev(edmac),
+			 "channel is already used for cyclic transfers\n");
+		return NULL;
+	}
+
+	if (period_len > DMA_MAX_CHAN_BYTES) {
+		dev_warn(chan2dev(edmac), "too big period length %d\n",
+			 period_len);
+		return NULL;
+	}
+
+	/* Split the buffer into period size chunks */
+	first = NULL;
+	for (offset = 0; offset < buf_len; offset += period_len) {
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		if (dir == DMA_MEM_TO_DEV) {
+			desc->src_addr = dma_addr + offset;
+			desc->dst_addr = edmac->runtime_addr;
+		} else {
+			desc->src_addr = edmac->runtime_addr;
+			desc->dst_addr = dma_addr + offset;
+		}
+
+		desc->size = period_len;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+
+	return &first->txd;
+
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_terminate_all - terminate all transactions
+ * @edmac: channel
+ *
+ * Stops all DMA transactions. All descriptors are put back to the
+ * @edmac->free_list and callbacks are _not_ called.
+ */
+static int ep93xx_dma_terminate_all(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc, *_d;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	/* First we disable and flush the DMA channel */
+	edmac->edma->hw_shutdown(edmac);
+	clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags);
+	list_splice_init(&edmac->active, &list);
+	list_splice_init(&edmac->queue, &list);
+	/*
+	 * We then re-enable the channel. This way we can continue submitting
+	 * the descriptors by just calling ->hw_submit() again.
+	 */
+	edmac->edma->hw_setup(edmac);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	list_for_each_entry_safe(desc, _d, &list, node)
+		ep93xx_dma_desc_put(edmac, desc);
+
+	return 0;
+}
+
+static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac,
+				   struct dma_slave_config *config)
+{
+	enum dma_slave_buswidth width;
+	unsigned long flags;
+	u32 addr, ctrl;
+
+	if (!edmac->edma->m2m)
+		return -EINVAL;
+
+	switch (config->direction) {
+	case DMA_DEV_TO_MEM:
+		width = config->src_addr_width;
+		addr = config->src_addr;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		width = config->dst_addr_width;
+		addr = config->dst_addr;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		ctrl = 0;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		ctrl = M2M_CONTROL_PW_16;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		ctrl = M2M_CONTROL_PW_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	edmac->runtime_addr = addr;
+	edmac->runtime_ctrl = ctrl;
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ep93xx_dma_control - manipulate all pending operations on a channel
+ * @chan: channel
+ * @cmd: control command to perform
+ * @arg: optional argument
+ *
+ * Controls the channel. Function returns %0 in case of success or negative
+ * error in case of failure.
+ */
+static int ep93xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			      unsigned long arg)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct dma_slave_config *config;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		return ep93xx_dma_terminate_all(edmac);
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		return ep93xx_dma_slave_config(edmac, config);
+
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+/**
+ * ep93xx_dma_tx_status - check if a transaction is completed
+ * @chan: channel
+ * @cookie: transaction specific cookie
+ * @state: state of the transaction is stored here if given
+ *
+ * This function can be used to query state of a given transaction.
+ */
+static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *state)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	ret = dma_cookie_status(chan, cookie, state);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	return ret;
+}
+
+/**
+ * ep93xx_dma_issue_pending - push pending transactions to the hardware
+ * @chan: channel
+ *
+ * When this function is called, all pending transactions are pushed to the
+ * hardware and executed.
+ */
+static void ep93xx_dma_issue_pending(struct dma_chan *chan)
+{
+	ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan));
+}
+
+static int __init ep93xx_dma_probe(struct platform_device *pdev)
+{
+	struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct ep93xx_dma_engine *edma;
+	struct dma_device *dma_dev;
+	size_t edma_size;
+	int ret, i;
+
+	edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan);
+	edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL);
+	if (!edma)
+		return -ENOMEM;
+
+	dma_dev = &edma->dma_dev;
+	edma->m2m = platform_get_device_id(pdev)->driver_data;
+	edma->num_channels = pdata->num_channels;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+	for (i = 0; i < pdata->num_channels; i++) {
+		const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i];
+		struct ep93xx_dma_chan *edmac = &edma->channels[i];
+
+		edmac->chan.device = dma_dev;
+		edmac->regs = cdata->base;
+		edmac->irq = cdata->irq;
+		edmac->edma = edma;
+
+		edmac->clk = clk_get(NULL, cdata->name);
+		if (IS_ERR(edmac->clk)) {
+			dev_warn(&pdev->dev, "failed to get clock for %s\n",
+				 cdata->name);
+			continue;
+		}
+
+		spin_lock_init(&edmac->lock);
+		INIT_LIST_HEAD(&edmac->active);
+		INIT_LIST_HEAD(&edmac->queue);
+		INIT_LIST_HEAD(&edmac->free_list);
+		tasklet_init(&edmac->tasklet, ep93xx_dma_tasklet,
+			     (unsigned long)edmac);
+
+		list_add_tail(&edmac->chan.device_node,
+			      &dma_dev->channels);
+	}
+
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+
+	dma_dev->dev = &pdev->dev;
+	dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources;
+	dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
+	dma_dev->device_control = ep93xx_dma_control;
+	dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
+	dma_dev->device_tx_status = ep93xx_dma_tx_status;
+
+	dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES);
+
+	if (edma->m2m) {
+		dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+		dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy;
+
+		edma->hw_setup = m2m_hw_setup;
+		edma->hw_shutdown = m2m_hw_shutdown;
+		edma->hw_submit = m2m_hw_submit;
+		edma->hw_interrupt = m2m_hw_interrupt;
+	} else {
+		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+		edma->hw_setup = m2p_hw_setup;
+		edma->hw_shutdown = m2p_hw_shutdown;
+		edma->hw_submit = m2p_hw_submit;
+		edma->hw_interrupt = m2p_hw_interrupt;
+	}
+
+	ret = dma_async_device_register(dma_dev);
+	if (unlikely(ret)) {
+		for (i = 0; i < edma->num_channels; i++) {
+			struct ep93xx_dma_chan *edmac = &edma->channels[i];
+			if (!IS_ERR_OR_NULL(edmac->clk))
+				clk_put(edmac->clk);
+		}
+		kfree(edma);
+	} else {
+		dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n",
+			 edma->m2m ? "M" : "P");
+	}
+
+	return ret;
+}
+
+static struct platform_device_id ep93xx_dma_driver_ids[] = {
+	{ "ep93xx-dma-m2p", 0 },
+	{ "ep93xx-dma-m2m", 1 },
+	{ },
+};
+
+static struct platform_driver ep93xx_dma_driver = {
+	.driver		= {
+		.name	= "ep93xx-dma",
+	},
+	.id_table	= ep93xx_dma_driver_ids,
+};
+
+static int __init ep93xx_dma_module_init(void)
+{
+	return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe);
+}
+subsys_initcall(ep93xx_dma_module_init);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@iki.fi>");
+MODULE_DESCRIPTION("EP93xx DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
new file mode 100644
index 00000000..8f84761f
--- /dev/null
+++ b/drivers/dma/fsldma.c
@@ -0,0 +1,1472 @@
+/*
+ * Freescale MPC85xx, MPC83xx DMA Engine support
+ *
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ *   Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * Description:
+ *   DMA engine driver for Freescale MPC8540 DMA controller, which is
+ *   also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc.
+ *   The support for MPC8349 DMA controller is also added.
+ *
+ * This driver instructs the DMA controller to issue the PCI Read Multiple
+ * command for PCI read operations, instead of using the default PCI Read Line
+ * command. Please be aware that this setting may result in read pre-fetching
+ * on some platforms.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/of_platform.h>
+
+#include "dmaengine.h"
+#include "fsldma.h"
+
+#define chan_dbg(chan, fmt, arg...)					\
+	dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)					\
+	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+static const char msg_ld_oom[] = "No free memory for link descriptor";
+
+/*
+ * Register Helpers
+ */
+
+static void set_sr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->sr, val, 32);
+}
+
+static u32 get_sr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->sr, 32);
+}
+
+static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+{
+	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static u32 get_bcr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->bcr, 32);
+}
+
+/*
+ * Descriptor Helpers
+ */
+
+static void set_desc_cnt(struct fsldma_chan *chan,
+				struct fsl_dma_ld_hw *hw, u32 count)
+{
+	hw->count = CPU_TO_DMA(chan, count, 32);
+}
+
+static u32 get_desc_cnt(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	return DMA_TO_CPU(chan, desc->hw.count, 32);
+}
+
+static void set_desc_src(struct fsldma_chan *chan,
+			 struct fsl_dma_ld_hw *hw, dma_addr_t src)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+	hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64);
+}
+
+static dma_addr_t get_desc_src(struct fsldma_chan *chan,
+			       struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+	return DMA_TO_CPU(chan, desc->hw.src_addr, 64) & ~snoop_bits;
+}
+
+static void set_desc_dst(struct fsldma_chan *chan,
+			 struct fsl_dma_ld_hw *hw, dma_addr_t dst)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+	hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64);
+}
+
+static dma_addr_t get_desc_dst(struct fsldma_chan *chan,
+			       struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+	return DMA_TO_CPU(chan, desc->hw.dst_addr, 64) & ~snoop_bits;
+}
+
+static void set_desc_next(struct fsldma_chan *chan,
+			  struct fsl_dma_ld_hw *hw, dma_addr_t next)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
+	hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
+}
+
+static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
+
+	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
+		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+			| snoop_bits, 64);
+}
+
+/*
+ * DMA Engine Hardware Control Helpers
+ */
+
+static void dma_init(struct fsldma_chan *chan)
+{
+	/* Reset the channel */
+	DMA_OUT(chan, &chan->regs->mr, 0, 32);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		/* Set the channel to below modes:
+		 * EIE - Error interrupt enable
+		 * EOLNIE - End of links interrupt enable
+		 * BWC - Bandwidth sharing among channels
+		 */
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
+				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32);
+		break;
+	case FSL_DMA_IP_83XX:
+		/* Set the channel to below modes:
+		 * EOTIE - End-of-transfer interrupt enable
+		 * PRC_RM - PCI read multiple
+		 */
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
+				| FSL_DMA_MR_PRC_RM, 32);
+		break;
+	}
+}
+
+static int dma_is_idle(struct fsldma_chan *chan)
+{
+	u32 sr = get_sr(chan);
+	return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
+}
+
+/*
+ * Start the DMA controller
+ *
+ * Preconditions:
+ * - the CDAR register must point to the start descriptor
+ * - the MRn[CS] bit must be cleared
+ */
+static void dma_start(struct fsldma_chan *chan)
+{
+	u32 mode;
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
+	if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+		DMA_OUT(chan, &chan->regs->bcr, 0, 32);
+		mode |= FSL_DMA_MR_EMP_EN;
+	} else {
+		mode &= ~FSL_DMA_MR_EMP_EN;
+	}
+
+	if (chan->feature & FSL_DMA_CHAN_START_EXT) {
+		mode |= FSL_DMA_MR_EMS_EN;
+	} else {
+		mode &= ~FSL_DMA_MR_EMS_EN;
+		mode |= FSL_DMA_MR_CS;
+	}
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+}
+
+static void dma_halt(struct fsldma_chan *chan)
+{
+	u32 mode;
+	int i;
+
+	/* read the mode register */
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
+	/*
+	 * The 85xx controller supports channel abort, which will stop
+	 * the current transfer. On 83xx, this bit is the transfer error
+	 * mask bit, which should not be changed.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		mode |= FSL_DMA_MR_CA;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+
+		mode &= ~FSL_DMA_MR_CA;
+	}
+
+	/* stop the DMA controller */
+	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+
+	/* wait for the DMA controller to become idle */
+	for (i = 0; i < 100; i++) {
+		if (dma_is_idle(chan))
+			return;
+
+		udelay(10);
+	}
+
+	if (!dma_is_idle(chan))
+		chan_err(chan, "DMA halt timeout!\n");
+}
+
+/**
+ * fsl_chan_set_src_loop_size - Set source address hold transfer size
+ * @chan : Freescale DMA channel
+ * @size     : Address loop size, 0 for disable loop
+ *
+ * The set source address hold transfer size. The source
+ * address hold or loop transfer size is when the DMA transfer
+ * data from source address (SA), if the loop size is 4, the DMA will
+ * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
+ * SA + 1 ... and so on.
+ */
+static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size)
+{
+	u32 mode;
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
+	switch (size) {
+	case 0:
+		mode &= ~FSL_DMA_MR_SAHE;
+		break;
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14);
+		break;
+	}
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+}
+
+/**
+ * fsl_chan_set_dst_loop_size - Set destination address hold transfer size
+ * @chan : Freescale DMA channel
+ * @size     : Address loop size, 0 for disable loop
+ *
+ * The set destination address hold transfer size. The destination
+ * address hold or loop transfer size is when the DMA transfer
+ * data to destination address (TA), if the loop size is 4, the DMA will
+ * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
+ * TA + 1 ... and so on.
+ */
+static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size)
+{
+	u32 mode;
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
+	switch (size) {
+	case 0:
+		mode &= ~FSL_DMA_MR_DAHE;
+		break;
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16);
+		break;
+	}
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+}
+
+/**
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
+ * @chan : Freescale DMA channel
+ * @size     : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
+ *
+ * A size of 0 disables external pause control. The maximum size is 1024.
+ */
+static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size)
+{
+	u32 mode;
+
+	BUG_ON(size > 1024);
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode |= (__ilog2(size) << 24) & 0x0f000000;
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+}
+
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsldma_chan *chan, int enable)
+{
+	if (enable)
+		chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+	else
+		chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+}
+
+/**
+ * fsl_chan_toggle_ext_start - Toggle channel external start status
+ * @chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * If enable the external start, the channel can be started by an
+ * external DMA start pin. So the dma_start() does not start the
+ * transfer immediately. The DMA channel will wait for the
+ * control pin asserted.
+ */
+static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
+{
+	if (enable)
+		chan->feature |= FSL_DMA_CHAN_START_EXT;
+	else
+		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
+
+	if (list_empty(&chan->ld_pending))
+		goto out_splice;
+
+	/*
+	 * Add the hardware descriptor to the chain of hardware descriptors
+	 * that already exists in memory.
+	 *
+	 * This will un-set the EOL bit of the existing transaction, and the
+	 * last link in this transaction will become the EOL descriptor.
+	 */
+	set_desc_next(chan, &tail->hw, desc->async_tx.phys);
+
+	/*
+	 * Add the software descriptor and all children to the list
+	 * of pending transactions
+	 */
+out_splice:
+	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+}
+
+static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
+	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+	struct fsl_desc_sw *child;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	/*
+	 * assign cookies to all of the software descriptors
+	 * that make up this transaction
+	 */
+	list_for_each_entry(child, &desc->tx_list, node) {
+		cookie = dma_cookie_assign(&child->async_tx);
+	}
+
+	/* put this transaction onto the tail of the pending queue */
+	append_ld_queue(chan, desc);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return cookie;
+}
+
+/**
+ * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ *
+ * Return - The descriptor allocated. NULL for failed.
+ */
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+	dma_addr_t pdesc;
+
+	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (!desc) {
+		chan_dbg(chan, "out of memory for link descriptor\n");
+		return NULL;
+	}
+
+	memset(desc, 0, sizeof(*desc));
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = fsl_dma_tx_submit;
+	desc->async_tx.phys = pdesc;
+
+#ifdef FSL_DMA_LD_DEBUG
+	chan_dbg(chan, "LD %p allocated\n", desc);
+#endif
+
+	return desc;
+}
+
+/**
+ * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
+ * @chan : Freescale DMA channel
+ *
+ * This function will create a dma pool for descriptor allocation.
+ *
+ * Return - The number of descriptors allocated.
+ */
+static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 1;
+
+	/*
+	 * We need the descriptor to be aligned to 32bytes
+	 * for meeting FSL DMA specification requirement.
+	 */
+	chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+					  sizeof(struct fsl_desc_sw),
+					  __alignof__(struct fsl_desc_sw), 0);
+	if (!chan->desc_pool) {
+		chan_err(chan, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	/* there is at least one descriptor free to be allocated */
+	return 1;
+}
+
+/**
+ * fsldma_free_desc_list - Free all descriptors in a queue
+ * @chan: Freescae DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsldma_free_desc_list(struct fsldma_chan *chan,
+				  struct list_head *list)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node) {
+		list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
+					  struct list_head *list)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe_reverse(desc, _desc, list, node) {
+		list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	unsigned long flags;
+
+	chan_dbg(chan, "free all channel resources\n");
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	fsldma_free_desc_list(chan, &chan->ld_pending);
+	fsldma_free_desc_list(chan, &chan->ld_running);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
+{
+	struct fsldma_chan *chan;
+	struct fsl_desc_sw *new;
+
+	if (!dchan)
+		return NULL;
+
+	chan = to_fsl_chan(dchan);
+
+	new = fsl_dma_alloc_descriptor(chan);
+	if (!new) {
+		chan_err(chan, "%s\n", msg_ld_oom);
+		return NULL;
+	}
+
+	new->async_tx.cookie = -EBUSY;
+	new->async_tx.flags = flags;
+
+	/* Insert the link descriptor to the LD ring */
+	list_add_tail(&new->node, &new->tx_list);
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &new->async_tx;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_memcpy(struct dma_chan *dchan,
+	dma_addr_t dma_dst, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct fsldma_chan *chan;
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+	size_t copy;
+
+	if (!dchan)
+		return NULL;
+
+	if (!len)
+		return NULL;
+
+	chan = to_fsl_chan(dchan);
+
+	do {
+
+		/* Allocate the link descriptor from DMA pool */
+		new = fsl_dma_alloc_descriptor(chan);
+		if (!new) {
+			chan_err(chan, "%s\n", msg_ld_oom);
+			goto fail;
+		}
+
+		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
+
+		set_desc_cnt(chan, &new->hw, copy);
+		set_desc_src(chan, &new->hw, dma_src);
+		set_desc_dst(chan, &new->hw, dma_dst);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
+	struct scatterlist *dst_sg, unsigned int dst_nents,
+	struct scatterlist *src_sg, unsigned int src_nents,
+	unsigned long flags)
+{
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	size_t dst_avail, src_avail;
+	dma_addr_t dst, src;
+	size_t len;
+
+	/* basic sanity checks */
+	if (dst_nents == 0 || src_nents == 0)
+		return NULL;
+
+	if (dst_sg == NULL || src_sg == NULL)
+		return NULL;
+
+	/*
+	 * TODO: should we check that both scatterlists have the same
+	 * TODO: number of bytes in total? Is that really an error?
+	 */
+
+	/* get prepared for the loop */
+	dst_avail = sg_dma_len(dst_sg);
+	src_avail = sg_dma_len(src_sg);
+
+	/* run until we are out of scatterlist entries */
+	while (true) {
+
+		/* create the largest transaction possible */
+		len = min_t(size_t, src_avail, dst_avail);
+		len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT);
+		if (len == 0)
+			goto fetch;
+
+		dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;
+		src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
+
+		/* allocate and populate the descriptor */
+		new = fsl_dma_alloc_descriptor(chan);
+		if (!new) {
+			chan_err(chan, "%s\n", msg_ld_oom);
+			goto fail;
+		}
+
+		set_desc_cnt(chan, &new->hw, len);
+		set_desc_src(chan, &new->hw, src);
+		set_desc_dst(chan, &new->hw, dst);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+		prev = new;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+
+		/* update metadata */
+		dst_avail -= len;
+		src_avail -= len;
+
+fetch:
+		/* fetch the next dst scatterlist entry */
+		if (dst_avail == 0) {
+
+			/* no more entries: we're done */
+			if (dst_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			dst_sg = sg_next(dst_sg);
+			if (dst_sg == NULL)
+				break;
+
+			dst_nents--;
+			dst_avail = sg_dma_len(dst_sg);
+		}
+
+		/* fetch the next src scatterlist entry */
+		if (src_avail == 0) {
+
+			/* no more entries: we're done */
+			if (src_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			src_sg = sg_next(src_sg);
+			if (src_sg == NULL)
+				break;
+
+			src_nents--;
+			src_avail = sg_dma_len(src_sg);
+		}
+	}
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
+	return NULL;
+}
+
+/**
+ * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: DMAEngine flags
+ * @context: transaction context (ignored)
+ *
+ * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
+ * DMA_SLAVE API, this gets the device-specific information from the
+ * chan->private variable.
+ */
+static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
+	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	/*
+	 * This operation is not supported on the Freescale DMA controller
+	 *
+	 * However, we need to provide the function pointer to allow the
+	 * device_control() method to work.
+	 */
+	return NULL;
+}
+
+static int fsl_dma_device_control(struct dma_chan *dchan,
+				  enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct dma_slave_config *config;
+	struct fsldma_chan *chan;
+	unsigned long flags;
+	int size;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan = to_fsl_chan(dchan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		spin_lock_irqsave(&chan->desc_lock, flags);
+
+		/* Halt the DMA engine */
+		dma_halt(chan);
+
+		/* Remove and free all of the descriptors in the LD queue */
+		fsldma_free_desc_list(chan, &chan->ld_pending);
+		fsldma_free_desc_list(chan, &chan->ld_running);
+		chan->idle = true;
+
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+
+		/* make sure the channel supports setting burst size */
+		if (!chan->set_request_count)
+			return -ENXIO;
+
+		/* we set the controller burst size depending on direction */
+		if (config->direction == DMA_MEM_TO_DEV)
+			size = config->dst_addr_width * config->dst_maxburst;
+		else
+			size = config->src_addr_width * config->src_maxburst;
+
+		chan->set_request_count(chan, size);
+		return 0;
+
+	case FSLDMA_EXTERNAL_START:
+
+		/* make sure the channel supports external start */
+		if (!chan->toggle_ext_start)
+			return -ENXIO;
+
+		chan->toggle_ext_start(chan, arg);
+		return 0;
+
+	default:
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+/**
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
+				      struct fsl_desc_sw *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	struct device *dev = chan->common.device->dev;
+	dma_addr_t src = get_desc_src(chan, desc);
+	dma_addr_t dst = get_desc_dst(chan, desc);
+	u32 len = get_desc_cnt(chan, desc);
+
+	/* Run the link descriptor callback function */
+	if (txd->callback) {
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p callback\n", desc);
+#endif
+		txd->callback(txd->callback_param);
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	/* Unmap the dst buffer, if requested */
+	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
+		else
+			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
+	}
+
+	/* Unmap the src buffer, if requested */
+	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
+	}
+
+#ifdef FSL_DMA_LD_DEBUG
+	chan_dbg(chan, "LD %p free\n", desc);
+#endif
+	dma_pool_free(chan->desc_pool, desc, txd->phys);
+}
+
+/**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = DMA_IN(chan, &chan->regs->mr, 32);
+		mode &= ~FSL_DMA_MR_CS;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+/**
+ * fsl_dma_memcpy_issue_pending - Issue the DMA start command
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+}
+
+/**
+ * fsl_tx_status - Determine the DMA status
+ * @chan : Freescale DMA channel
+ */
+static enum dma_status fsl_tx_status(struct dma_chan *dchan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return ret;
+}
+
+/*----------------------------------------------------------------------------*/
+/* Interrupt Handling                                                         */
+/*----------------------------------------------------------------------------*/
+
+static irqreturn_t fsldma_chan_irq(int irq, void *data)
+{
+	struct fsldma_chan *chan = data;
+	u32 stat;
+
+	/* save and clear the status register */
+	stat = get_sr(chan);
+	set_sr(chan, stat);
+	chan_dbg(chan, "irq: stat = 0x%x\n", stat);
+
+	/* check that this was really our device */
+	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
+	if (!stat)
+		return IRQ_NONE;
+
+	if (stat & FSL_DMA_SR_TE)
+		chan_err(chan, "Transfer Error!\n");
+
+	/*
+	 * Programming Error
+	 * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+	 * triger a PE interrupt.
+	 */
+	if (stat & FSL_DMA_SR_PE) {
+		chan_dbg(chan, "irq: Programming Error INT\n");
+		stat &= ~FSL_DMA_SR_PE;
+		if (get_bcr(chan) != 0)
+			chan_err(chan, "Programming Error!\n");
+	}
+
+	/*
+	 * For MPC8349, EOCDI event need to update cookie
+	 * and start the next transfer if it exist.
+	 */
+	if (stat & FSL_DMA_SR_EOCDI) {
+		chan_dbg(chan, "irq: End-of-Chain link INT\n");
+		stat &= ~FSL_DMA_SR_EOCDI;
+	}
+
+	/*
+	 * If it current transfer is the end-of-transfer,
+	 * we should clear the Channel Start bit for
+	 * prepare next transfer.
+	 */
+	if (stat & FSL_DMA_SR_EOLNI) {
+		chan_dbg(chan, "irq: End-of-link INT\n");
+		stat &= ~FSL_DMA_SR_EOLNI;
+	}
+
+	/* check that the DMA controller is really idle */
+	if (!dma_is_idle(chan))
+		chan_err(chan, "irq: controller not idle!\n");
+
+	/* check that we handled all of the bits */
+	if (stat)
+		chan_err(chan, "irq: unhandled sr 0x%08x\n", stat);
+
+	/*
+	 * Schedule the tasklet to handle all cleanup of the current
+	 * transaction. It will start a new transaction if there is
+	 * one pending.
+	 */
+	tasklet_schedule(&chan->tasklet);
+	chan_dbg(chan, "irq: Exit\n");
+	return IRQ_HANDLED;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+	struct fsldma_chan *chan = (struct fsldma_chan *)data;
+	struct fsl_desc_sw *desc, *_desc;
+	LIST_HEAD(ld_cleanup);
+	unsigned long flags;
+
+	chan_dbg(chan, "tasklet entry\n");
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	/* update the cookie if we have some descriptors to cleanup */
+	if (!list_empty(&chan->ld_running)) {
+		dma_cookie_t cookie;
+
+		desc = to_fsl_desc(chan->ld_running.prev);
+		cookie = desc->async_tx.cookie;
+		dma_cookie_complete(&desc->async_tx);
+
+		chan_dbg(chan, "completed_cookie=%d\n", cookie);
+	}
+
+	/*
+	 * move the descriptors to a temporary list so we can drop the lock
+	 * during the entire cleanup operation
+	 */
+	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
+
+	/* the hardware is now idle and ready for more */
+	chan->idle = true;
+
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
+
+		/* Remove from the list of transactions */
+		list_del(&desc->node);
+
+		/* Run all cleanup for this descriptor */
+		fsldma_cleanup_descriptor(chan, desc);
+	}
+
+	chan_dbg(chan, "tasklet exit\n");
+}
+
+static irqreturn_t fsldma_ctrl_irq(int irq, void *data)
+{
+	struct fsldma_device *fdev = data;
+	struct fsldma_chan *chan;
+	unsigned int handled = 0;
+	u32 gsr, mask;
+	int i;
+
+	gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->regs)
+						   : in_le32(fdev->regs);
+	mask = 0xff000000;
+	dev_dbg(fdev->dev, "IRQ: gsr 0x%.8x\n", gsr);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (gsr & mask) {
+			dev_dbg(fdev->dev, "IRQ: chan %d\n", chan->id);
+			fsldma_chan_irq(irq, chan);
+			handled++;
+		}
+
+		gsr &= ~mask;
+		mask >>= 8;
+	}
+
+	return IRQ_RETVAL(handled);
+}
+
+static void fsldma_free_irqs(struct fsldma_device *fdev)
+{
+	struct fsldma_chan *chan;
+	int i;
+
+	if (fdev->irq != NO_IRQ) {
+		dev_dbg(fdev->dev, "free per-controller IRQ\n");
+		free_irq(fdev->irq, fdev);
+		return;
+	}
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (chan && chan->irq != NO_IRQ) {
+			chan_dbg(chan, "free per-channel IRQ\n");
+			free_irq(chan->irq, chan);
+		}
+	}
+}
+
+static int fsldma_request_irqs(struct fsldma_device *fdev)
+{
+	struct fsldma_chan *chan;
+	int ret;
+	int i;
+
+	/* if we have a per-controller IRQ, use that */
+	if (fdev->irq != NO_IRQ) {
+		dev_dbg(fdev->dev, "request per-controller IRQ\n");
+		ret = request_irq(fdev->irq, fsldma_ctrl_irq, IRQF_SHARED,
+				  "fsldma-controller", fdev);
+		return ret;
+	}
+
+	/* no per-controller IRQ, use the per-channel IRQs */
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (chan->irq == NO_IRQ) {
+			chan_err(chan, "interrupts property missing in device tree\n");
+			ret = -ENODEV;
+			goto out_unwind;
+		}
+
+		chan_dbg(chan, "request per-channel IRQ\n");
+		ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED,
+				  "fsldma-chan", chan);
+		if (ret) {
+			chan_err(chan, "unable to request per-channel IRQ\n");
+			goto out_unwind;
+		}
+	}
+
+	return 0;
+
+out_unwind:
+	for (/* none */; i >= 0; i--) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (chan->irq == NO_IRQ)
+			continue;
+
+		free_irq(chan->irq, chan);
+	}
+
+	return ret;
+}
+
+/*----------------------------------------------------------------------------*/
+/* OpenFirmware Subsystem                                                     */
+/*----------------------------------------------------------------------------*/
+
+static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
+{
+	struct fsldma_chan *chan;
+	struct resource res;
+	int err;
+
+	/* alloc channel */
+	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	if (!chan) {
+		dev_err(fdev->dev, "no free memory for DMA channels!\n");
+		err = -ENOMEM;
+		goto out_return;
+	}
+
+	/* ioremap registers for use */
+	chan->regs = of_iomap(node, 0);
+	if (!chan->regs) {
+		dev_err(fdev->dev, "unable to ioremap registers\n");
+		err = -ENOMEM;
+		goto out_free_chan;
+	}
+
+	err = of_address_to_resource(node, 0, &res);
+	if (err) {
+		dev_err(fdev->dev, "unable to find 'reg' property\n");
+		goto out_iounmap_regs;
+	}
+
+	chan->feature = feature;
+	if (!fdev->feature)
+		fdev->feature = chan->feature;
+
+	/*
+	 * If the DMA device's feature is different than the feature
+	 * of its channels, report the bug
+	 */
+	WARN_ON(fdev->feature != chan->feature);
+
+	chan->dev = fdev->dev;
+	chan->id = ((res.start - 0x100) & 0xfff) >> 7;
+	if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
+		dev_err(fdev->dev, "too many channels for device\n");
+		err = -EINVAL;
+		goto out_iounmap_regs;
+	}
+
+	fdev->chan[chan->id] = chan;
+	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id);
+
+	/* Initialize the channel */
+	dma_init(chan);
+
+	/* Clear cdar registers */
+	set_cdar(chan, 0);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+	case FSL_DMA_IP_83XX:
+		chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+		chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+		chan->set_dst_loop_size = fsl_chan_set_dst_loop_size;
+		chan->set_request_count = fsl_chan_set_request_count;
+	}
+
+	spin_lock_init(&chan->desc_lock);
+	INIT_LIST_HEAD(&chan->ld_pending);
+	INIT_LIST_HEAD(&chan->ld_running);
+	chan->idle = true;
+
+	chan->common.device = &fdev->common;
+	dma_cookie_init(&chan->common);
+
+	/* find the IRQ line, if it exists in the device tree */
+	chan->irq = irq_of_parse_and_map(node, 0);
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&chan->common.device_node, &fdev->common.channels);
+	fdev->common.chancnt++;
+
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
+		 chan->irq != NO_IRQ ? chan->irq : fdev->irq);
+
+	return 0;
+
+out_iounmap_regs:
+	iounmap(chan->regs);
+out_free_chan:
+	kfree(chan);
+out_return:
+	return err;
+}
+
+static void fsl_dma_chan_remove(struct fsldma_chan *chan)
+{
+	irq_dispose_mapping(chan->irq);
+	list_del(&chan->common.device_node);
+	iounmap(chan->regs);
+	kfree(chan);
+}
+
+static int __devinit fsldma_of_probe(struct platform_device *op)
+{
+	struct fsldma_device *fdev;
+	struct device_node *child;
+	int err;
+
+	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+	if (!fdev) {
+		dev_err(&op->dev, "No enough memory for 'priv'\n");
+		err = -ENOMEM;
+		goto out_return;
+	}
+
+	fdev->dev = &op->dev;
+	INIT_LIST_HEAD(&fdev->common.channels);
+
+	/* ioremap the registers for use */
+	fdev->regs = of_iomap(op->dev.of_node, 0);
+	if (!fdev->regs) {
+		dev_err(&op->dev, "unable to ioremap registers\n");
+		err = -ENOMEM;
+		goto out_free_fdev;
+	}
+
+	/* map the channel IRQ if it exists, but don't hookup the handler yet */
+	fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);
+
+	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+	dma_cap_set(DMA_SG, fdev->common.cap_mask);
+	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
+	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
+	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
+	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
+	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
+	fdev->common.device_tx_status = fsl_tx_status;
+	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+	fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
+	fdev->common.device_control = fsl_dma_device_control;
+	fdev->common.dev = &op->dev;
+
+	dma_set_mask(&(op->dev), DMA_BIT_MASK(36));
+
+	dev_set_drvdata(&op->dev, fdev);
+
+	/*
+	 * We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove(). Instead, we manually instantiate every DMA
+	 * channel object.
+	 */
+	for_each_child_of_node(op->dev.of_node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) {
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+				"fsl,eloplus-dma-channel");
+		}
+
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel")) {
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+				"fsl,elo-dma-channel");
+		}
+	}
+
+	/*
+	 * Hookup the IRQ handler(s)
+	 *
+	 * If we have a per-controller interrupt, we prefer that to the
+	 * per-channel interrupts to reduce the number of shared interrupt
+	 * handlers on the same IRQ line
+	 */
+	err = fsldma_request_irqs(fdev);
+	if (err) {
+		dev_err(fdev->dev, "unable to request IRQs\n");
+		goto out_free_fdev;
+	}
+
+	dma_async_device_register(&fdev->common);
+	return 0;
+
+out_free_fdev:
+	irq_dispose_mapping(fdev->irq);
+	kfree(fdev);
+out_return:
+	return err;
+}
+
+static int fsldma_of_remove(struct platform_device *op)
+{
+	struct fsldma_device *fdev;
+	unsigned int i;
+
+	fdev = dev_get_drvdata(&op->dev);
+	dma_async_device_unregister(&fdev->common);
+
+	fsldma_free_irqs(fdev);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		if (fdev->chan[i])
+			fsl_dma_chan_remove(fdev->chan[i]);
+	}
+
+	iounmap(fdev->regs);
+	dev_set_drvdata(&op->dev, NULL);
+	kfree(fdev);
+
+	return 0;
+}
+
+static const struct of_device_id fsldma_of_ids[] = {
+	{ .compatible = "fsl,eloplus-dma", },
+	{ .compatible = "fsl,elo-dma", },
+	{}
+};
+
+static struct platform_driver fsldma_of_driver = {
+	.driver = {
+		.name = "fsl-elo-dma",
+		.owner = THIS_MODULE,
+		.of_match_table = fsldma_of_ids,
+	},
+	.probe = fsldma_of_probe,
+	.remove = fsldma_of_remove,
+};
+
+/*----------------------------------------------------------------------------*/
+/* Module Init / Exit                                                         */
+/*----------------------------------------------------------------------------*/
+
+static __init int fsldma_init(void)
+{
+	pr_info("Freescale Elo / Elo Plus DMA driver\n");
+	return platform_driver_register(&fsldma_of_driver);
+}
+
+static void __exit fsldma_exit(void)
+{
+	platform_driver_unregister(&fsldma_of_driver);
+}
+
+subsys_initcall(fsldma_init);
+module_exit(fsldma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
new file mode 100644
index 00000000..f5c38791
--- /dev/null
+++ b/drivers/dma/fsldma.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ *   Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_FSLDMA_H
+#define __DMA_FSLDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+/* Define data structures needed by Freescale
+ * MPC8540 and MPC8349 DMA controller.
+ */
+#define FSL_DMA_MR_CS		0x00000001
+#define FSL_DMA_MR_CC		0x00000002
+#define FSL_DMA_MR_CA		0x00000008
+#define FSL_DMA_MR_EIE		0x00000040
+#define FSL_DMA_MR_XFE		0x00000020
+#define FSL_DMA_MR_EOLNIE	0x00000100
+#define FSL_DMA_MR_EOLSIE	0x00000080
+#define FSL_DMA_MR_EOSIE	0x00000200
+#define FSL_DMA_MR_CDSM		0x00000010
+#define FSL_DMA_MR_CTM		0x00000004
+#define FSL_DMA_MR_EMP_EN	0x00200000
+#define FSL_DMA_MR_EMS_EN	0x00040000
+#define FSL_DMA_MR_DAHE		0x00002000
+#define FSL_DMA_MR_SAHE		0x00001000
+
+/*
+ * Bandwidth/pause control determines how many bytes a given
+ * channel is allowed to transfer before the DMA engine pauses
+ * the current channel and switches to the next channel
+ */
+#define FSL_DMA_MR_BWC         0x08000000
+
+/* Special MR definition for MPC8349 */
+#define FSL_DMA_MR_EOTIE	0x00000080
+#define FSL_DMA_MR_PRC_RM	0x00000800
+
+#define FSL_DMA_SR_CH		0x00000020
+#define FSL_DMA_SR_PE		0x00000010
+#define FSL_DMA_SR_CB		0x00000004
+#define FSL_DMA_SR_TE		0x00000080
+#define FSL_DMA_SR_EOSI		0x00000002
+#define FSL_DMA_SR_EOLSI	0x00000001
+#define FSL_DMA_SR_EOCDI	0x00000001
+#define FSL_DMA_SR_EOLNI	0x00000008
+
+#define FSL_DMA_SATR_SBPATMU			0x20000000
+#define FSL_DMA_SATR_STRANSINT_RIO		0x00c00000
+#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ	0x00050000
+#define FSL_DMA_SATR_SREADTYPE_BP_IORH		0x00020000
+#define FSL_DMA_SATR_SREADTYPE_BP_NREAD		0x00040000
+#define FSL_DMA_SATR_SREADTYPE_BP_MREAD		0x00070000
+
+#define FSL_DMA_DATR_DBPATMU			0x20000000
+#define FSL_DMA_DATR_DTRANSINT_RIO		0x00c00000
+#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE	0x00050000
+#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH	0x00010000
+
+#define FSL_DMA_EOL		((u64)0x1)
+#define FSL_DMA_SNEN		((u64)0x10)
+#define FSL_DMA_EOSIE		0x8
+#define FSL_DMA_NLDA_MASK	(~(u64)0x1f)
+
+#define FSL_DMA_BCR_MAX_CNT	0x03ffffffu
+
+#define FSL_DMA_DGSR_TE		0x80
+#define FSL_DMA_DGSR_CH		0x20
+#define FSL_DMA_DGSR_PE		0x10
+#define FSL_DMA_DGSR_EOLNI	0x08
+#define FSL_DMA_DGSR_CB		0x04
+#define FSL_DMA_DGSR_EOSI	0x02
+#define FSL_DMA_DGSR_EOLSI	0x01
+
+typedef u64 __bitwise v64;
+typedef u32 __bitwise v32;
+
+struct fsl_dma_ld_hw {
+	v64 src_addr;
+	v64 dst_addr;
+	v64 next_ln_addr;
+	v32 count;
+	v32 reserve;
+} __attribute__((aligned(32)));
+
+struct fsl_desc_sw {
+	struct fsl_dma_ld_hw hw;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+} __attribute__((aligned(32)));
+
+struct fsldma_chan_regs {
+	u32 mr;		/* 0x00 - Mode Register */
+	u32 sr;		/* 0x04 - Status Register */
+	u64 cdar;	/* 0x08 - Current descriptor address register */
+	u64 sar;	/* 0x10 - Source Address Register */
+	u64 dar;	/* 0x18 - Destination Address Register */
+	u32 bcr;	/* 0x20 - Byte Count Register */
+	u64 ndar;	/* 0x24 - Next Descriptor Address Register */
+};
+
+struct fsldma_chan;
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+
+struct fsldma_device {
+	void __iomem *regs;	/* DGSR register base */
+	struct device *dev;
+	struct dma_device common;
+	struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+	u32 feature;		/* The same as DMA channels */
+	int irq;		/* Channel IRQ */
+};
+
+/* Define macros for fsldma_chan->feature property */
+#define FSL_DMA_LITTLE_ENDIAN	0x00000000
+#define FSL_DMA_BIG_ENDIAN	0x00000001
+
+#define FSL_DMA_IP_MASK		0x00000ff0
+#define FSL_DMA_IP_85XX		0x00000010
+#define FSL_DMA_IP_83XX		0x00000020
+
+#define FSL_DMA_CHAN_PAUSE_EXT	0x00001000
+#define FSL_DMA_CHAN_START_EXT	0x00002000
+
+struct fsldma_chan {
+	char name[8];			/* Channel name */
+	struct fsldma_chan_regs __iomem *regs;
+	spinlock_t desc_lock;		/* Descriptor operation lock */
+	struct list_head ld_pending;	/* Link descriptors queue */
+	struct list_head ld_running;	/* Link descriptors queue */
+	struct dma_chan common;		/* DMA common channel */
+	struct dma_pool *desc_pool;	/* Descriptors pool */
+	struct device *dev;		/* Channel device */
+	int irq;			/* Channel IRQ */
+	int id;				/* Raw id of this channel */
+	struct tasklet_struct tasklet;
+	u32 feature;
+	bool idle;			/* DMA controller is idle */
+
+	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
+	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
+	void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size);
+	void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size);
+	void (*set_request_count)(struct fsldma_chan *fsl_chan, int size);
+};
+
+#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common)
+#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
+#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
+
+#ifndef __powerpc64__
+static u64 in_be64(const u64 __iomem *addr)
+{
+	return ((u64)in_be32((u32 __iomem *)addr) << 32) |
+		(in_be32((u32 __iomem *)addr + 1));
+}
+
+static void out_be64(u64 __iomem *addr, u64 val)
+{
+	out_be32((u32 __iomem *)addr, val >> 32);
+	out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static u64 in_le64(const u64 __iomem *addr)
+{
+	return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) |
+		(in_le32((u32 __iomem *)addr));
+}
+
+static void out_le64(u64 __iomem *addr, u64 val)
+{
+	out_le32((u32 __iomem *)addr + 1, val >> 32);
+	out_le32((u32 __iomem *)addr, (u32)val);
+}
+#endif
+
+#define DMA_IN(fsl_chan, addr, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			in_be##width(addr) : in_le##width(addr))
+#define DMA_OUT(fsl_chan, addr, val, width)				\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			out_be##width(addr, val) : out_le##width(addr, val))
+
+#define DMA_TO_CPU(fsl_chan, d, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			be##width##_to_cpu((__force __be##width)(v##width)d) : \
+			le##width##_to_cpu((__force __le##width)(v##width)d))
+#define CPU_TO_DMA(fsl_chan, c, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			(__force v##width)cpu_to_be##width(c) :		\
+			(__force v##width)cpu_to_le##width(c))
+
+#endif	/* __DMA_FSLDMA_H */
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
new file mode 100644
index 00000000..bb787d8e
--- /dev/null
+++ b/drivers/dma/imx-dma.c
@@ -0,0 +1,1137 @@
+/*
+ * drivers/dma/imx-dma.c
+ *
+ * This file contains a driver for the Freescale i.MX DMA engine
+ * found on i.MX1/21/27
+ *
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+
+#include <asm/irq.h>
+#include <mach/dma.h>
+#include <mach/hardware.h>
+
+#include "dmaengine.h"
+#define IMXDMA_MAX_CHAN_DESCRIPTORS	16
+#define IMX_DMA_CHANNELS  16
+
+#define IMX_DMA_2D_SLOTS	2
+#define IMX_DMA_2D_SLOT_A	0
+#define IMX_DMA_2D_SLOT_B	1
+
+#define IMX_DMA_LENGTH_LOOP	((unsigned int)-1)
+#define IMX_DMA_MEMSIZE_32	(0 << 4)
+#define IMX_DMA_MEMSIZE_8	(1 << 4)
+#define IMX_DMA_MEMSIZE_16	(2 << 4)
+#define IMX_DMA_TYPE_LINEAR	(0 << 10)
+#define IMX_DMA_TYPE_2D		(1 << 10)
+#define IMX_DMA_TYPE_FIFO	(2 << 10)
+
+#define IMX_DMA_ERR_BURST     (1 << 0)
+#define IMX_DMA_ERR_REQUEST   (1 << 1)
+#define IMX_DMA_ERR_TRANSFER  (1 << 2)
+#define IMX_DMA_ERR_BUFFER    (1 << 3)
+#define IMX_DMA_ERR_TIMEOUT   (1 << 4)
+
+#define DMA_DCR     0x00		/* Control Register */
+#define DMA_DISR    0x04		/* Interrupt status Register */
+#define DMA_DIMR    0x08		/* Interrupt mask Register */
+#define DMA_DBTOSR  0x0c		/* Burst timeout status Register */
+#define DMA_DRTOSR  0x10		/* Request timeout Register */
+#define DMA_DSESR   0x14		/* Transfer Error Status Register */
+#define DMA_DBOSR   0x18		/* Buffer overflow status Register */
+#define DMA_DBTOCR  0x1c		/* Burst timeout control Register */
+#define DMA_WSRA    0x40		/* W-Size Register A */
+#define DMA_XSRA    0x44		/* X-Size Register A */
+#define DMA_YSRA    0x48		/* Y-Size Register A */
+#define DMA_WSRB    0x4c		/* W-Size Register B */
+#define DMA_XSRB    0x50		/* X-Size Register B */
+#define DMA_YSRB    0x54		/* Y-Size Register B */
+#define DMA_SAR(x)  (0x80 + ((x) << 6))	/* Source Address Registers */
+#define DMA_DAR(x)  (0x84 + ((x) << 6))	/* Destination Address Registers */
+#define DMA_CNTR(x) (0x88 + ((x) << 6))	/* Count Registers */
+#define DMA_CCR(x)  (0x8c + ((x) << 6))	/* Control Registers */
+#define DMA_RSSR(x) (0x90 + ((x) << 6))	/* Request source select Registers */
+#define DMA_BLR(x)  (0x94 + ((x) << 6))	/* Burst length Registers */
+#define DMA_RTOR(x) (0x98 + ((x) << 6))	/* Request timeout Registers */
+#define DMA_BUCR(x) (0x98 + ((x) << 6))	/* Bus Utilization Registers */
+#define DMA_CCNR(x) (0x9C + ((x) << 6))	/* Channel counter Registers */
+
+#define DCR_DRST           (1<<1)
+#define DCR_DEN            (1<<0)
+#define DBTOCR_EN          (1<<15)
+#define DBTOCR_CNT(x)      ((x) & 0x7fff)
+#define CNTR_CNT(x)        ((x) & 0xffffff)
+#define CCR_ACRPT          (1<<14)
+#define CCR_DMOD_LINEAR    (0x0 << 12)
+#define CCR_DMOD_2D        (0x1 << 12)
+#define CCR_DMOD_FIFO      (0x2 << 12)
+#define CCR_DMOD_EOBFIFO   (0x3 << 12)
+#define CCR_SMOD_LINEAR    (0x0 << 10)
+#define CCR_SMOD_2D        (0x1 << 10)
+#define CCR_SMOD_FIFO      (0x2 << 10)
+#define CCR_SMOD_EOBFIFO   (0x3 << 10)
+#define CCR_MDIR_DEC       (1<<9)
+#define CCR_MSEL_B         (1<<8)
+#define CCR_DSIZ_32        (0x0 << 6)
+#define CCR_DSIZ_8         (0x1 << 6)
+#define CCR_DSIZ_16        (0x2 << 6)
+#define CCR_SSIZ_32        (0x0 << 4)
+#define CCR_SSIZ_8         (0x1 << 4)
+#define CCR_SSIZ_16        (0x2 << 4)
+#define CCR_REN            (1<<3)
+#define CCR_RPT            (1<<2)
+#define CCR_FRC            (1<<1)
+#define CCR_CEN            (1<<0)
+#define RTOR_EN            (1<<15)
+#define RTOR_CLK           (1<<14)
+#define RTOR_PSC           (1<<13)
+
+enum  imxdma_prep_type {
+	IMXDMA_DESC_MEMCPY,
+	IMXDMA_DESC_INTERLEAVED,
+	IMXDMA_DESC_SLAVE_SG,
+	IMXDMA_DESC_CYCLIC,
+};
+
+struct imx_dma_2d_config {
+	u16		xsr;
+	u16		ysr;
+	u16		wsr;
+	int		count;
+};
+
+struct imxdma_desc {
+	struct list_head		node;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	dma_addr_t			src;
+	dma_addr_t			dest;
+	size_t				len;
+	enum dma_transfer_direction	direction;
+	enum imxdma_prep_type		type;
+	/* For memcpy and interleaved */
+	unsigned int			config_port;
+	unsigned int			config_mem;
+	/* For interleaved transfers */
+	unsigned int			x;
+	unsigned int			y;
+	unsigned int			w;
+	/* For slave sg and cyclic */
+	struct scatterlist		*sg;
+	unsigned int			sgcount;
+};
+
+struct imxdma_channel {
+	int				hw_chaining;
+	struct timer_list		watchdog;
+	struct imxdma_engine		*imxdma;
+	unsigned int			channel;
+
+	struct tasklet_struct		dma_tasklet;
+	struct list_head		ld_free;
+	struct list_head		ld_queue;
+	struct list_head		ld_active;
+	int				descs_allocated;
+	enum dma_slave_buswidth		word_size;
+	dma_addr_t			per_address;
+	u32				watermark_level;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	int				dma_request;
+	struct scatterlist		*sg_list;
+	u32				ccr_from_device;
+	u32				ccr_to_device;
+	bool				enabled_2d;
+	int				slot_2d;
+};
+
+struct imxdma_engine {
+	struct device			*dev;
+	struct device_dma_parameters	dma_parms;
+	struct dma_device		dma_device;
+	void __iomem			*base;
+	struct clk			*dma_clk;
+	spinlock_t			lock;
+	struct imx_dma_2d_config	slots_2d[IMX_DMA_2D_SLOTS];
+	struct imxdma_channel		channel[IMX_DMA_CHANNELS];
+};
+
+static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct imxdma_channel, chan);
+}
+
+static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_desc *desc;
+
+	if (!list_empty(&imxdmac->ld_active)) {
+		desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc,
+					node);
+		if (desc->type == IMXDMA_DESC_CYCLIC)
+			return true;
+	}
+	return false;
+}
+
+
+
+static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val,
+			     unsigned offset)
+{
+	__raw_writel(val, imxdma->base + offset);
+}
+
+static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset)
+{
+	return __raw_readl(imxdma->base + offset);
+}
+
+static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
+{
+	if (cpu_is_mx27())
+		return imxdmac->hw_chaining;
+	else
+		return 0;
+}
+
+/*
+ * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation
+ */
+static inline int imxdma_sg_next(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct scatterlist *sg = d->sg;
+	unsigned long now;
+
+	now = min(d->len, sg->length);
+	if (d->len != IMX_DMA_LENGTH_LOOP)
+		d->len -= now;
+
+	if (d->direction == DMA_DEV_TO_MEM)
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_DAR(imxdmac->channel));
+	else
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_SAR(imxdmac->channel));
+
+	imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel));
+
+	dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, "
+		"size 0x%08x\n", __func__, imxdmac->channel,
+		 imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel)));
+
+	return now;
+}
+
+static void imxdma_enable_hw(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	local_irq_save(flags);
+
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) &
+			 ~(1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) |
+			 CCR_CEN | CCR_ACRPT, DMA_CCR(channel));
+
+	if ((cpu_is_mx21() || cpu_is_mx27()) &&
+			d->sg && imxdma_hw_chain(imxdmac)) {
+		d->sg = sg_next(d->sg);
+		if (d->sg) {
+			u32 tmp;
+			imxdma_sg_next(d);
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel));
+			imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT,
+					 DMA_CCR(channel));
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+static void imxdma_disable_hw(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	if (imxdma_hw_chain(imxdmac))
+		del_timer(&imxdmac->watchdog);
+
+	local_irq_save(flags);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) |
+			 (1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) &
+			 ~CCR_CEN, DMA_CCR(channel));
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	local_irq_restore(flags);
+}
+
+static void imxdma_watchdog(unsigned long data)
+{
+	struct imxdma_channel *imxdmac = (struct imxdma_channel *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(channel));
+
+	/* Tasklet watchdog error handler */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+	dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n",
+		imxdmac->channel);
+}
+
+static irqreturn_t imxdma_err_handler(int irq, void *dev_id)
+{
+	struct imxdma_engine *imxdma = dev_id;
+	unsigned int err_mask;
+	int i, disr;
+	int errcode;
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DRTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DSESR)  |
+		   imx_dmav1_readl(imxdma, DMA_DBOSR);
+
+	if (!err_mask)
+		return IRQ_HANDLED;
+
+	imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR);
+
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (!(err_mask & (1 << i)))
+			continue;
+		errcode = 0;
+
+		if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR);
+			errcode |= IMX_DMA_ERR_BURST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR);
+			errcode |= IMX_DMA_ERR_REQUEST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR);
+			errcode |= IMX_DMA_ERR_TRANSFER;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR);
+			errcode |= IMX_DMA_ERR_BUFFER;
+		}
+		/* Tasklet error handler */
+		tasklet_schedule(&imxdma->channel[i].dma_tasklet);
+
+		printk(KERN_WARNING
+		       "DMA timeout on channel %d -%s%s%s%s\n", i,
+		       errcode & IMX_DMA_ERR_BURST ?    " burst" : "",
+		       errcode & IMX_DMA_ERR_REQUEST ?  " request" : "",
+		       errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "",
+		       errcode & IMX_DMA_ERR_BUFFER ?   " buffer" : "");
+	}
+	return IRQ_HANDLED;
+}
+
+static void dma_irq_handle_channel(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int chno = imxdmac->channel;
+	struct imxdma_desc *desc;
+
+	spin_lock(&imxdma->lock);
+	if (list_empty(&imxdmac->ld_active)) {
+		spin_unlock(&imxdma->lock);
+		goto out;
+	}
+
+	desc = list_first_entry(&imxdmac->ld_active,
+				struct imxdma_desc,
+				node);
+	spin_unlock(&imxdma->lock);
+
+	if (desc->sg) {
+		u32 tmp;
+		desc->sg = sg_next(desc->sg);
+
+		if (desc->sg) {
+			imxdma_sg_next(desc);
+
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno));
+
+			if (imxdma_hw_chain(imxdmac)) {
+				/* FIXME: The timeout should probably be
+				 * configurable
+				 */
+				mod_timer(&imxdmac->watchdog,
+					jiffies + msecs_to_jiffies(500));
+
+				tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT;
+				imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+			} else {
+				imx_dmav1_writel(imxdma, tmp & ~CCR_CEN,
+						 DMA_CCR(chno));
+				tmp |= CCR_CEN;
+			}
+
+			imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+
+			if (imxdma_chan_is_doing_cyclic(imxdmac))
+				/* Tasklet progression */
+				tasklet_schedule(&imxdmac->dma_tasklet);
+
+			return;
+		}
+
+		if (imxdma_hw_chain(imxdmac)) {
+			del_timer(&imxdmac->watchdog);
+			return;
+		}
+	}
+
+out:
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(chno));
+	/* Tasklet irq */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+	struct imxdma_engine *imxdma = dev_id;
+	int i, disr;
+
+	if (cpu_is_mx21() || cpu_is_mx27())
+		imxdma_err_handler(irq, dev_id);
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr);
+
+	imx_dmav1_writel(imxdma, disr, DMA_DISR);
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (disr & (1 << i))
+			dma_irq_handle_channel(&imxdma->channel[i]);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int imxdma_xfer_desc(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
+	int slot = -1;
+	int i;
+
+	/* Configure and enable */
+	switch (d->type) {
+	case IMXDMA_DESC_INTERLEAVED:
+		/* Try to get a free 2D slot */
+		spin_lock_irqsave(&imxdma->lock, flags);
+		for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
+			if ((imxdma->slots_2d[i].count > 0) &&
+			((imxdma->slots_2d[i].xsr != d->x) ||
+			(imxdma->slots_2d[i].ysr != d->y) ||
+			(imxdma->slots_2d[i].wsr != d->w)))
+				continue;
+			slot = i;
+			break;
+		}
+		if (slot < 0)
+			return -EBUSY;
+
+		imxdma->slots_2d[slot].xsr = d->x;
+		imxdma->slots_2d[slot].ysr = d->y;
+		imxdma->slots_2d[slot].wsr = d->w;
+		imxdma->slots_2d[slot].count++;
+
+		imxdmac->slot_2d = slot;
+		imxdmac->enabled_2d = true;
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+
+		if (slot == IMX_DMA_2D_SLOT_A) {
+			d->config_mem &= ~CCR_MSEL_B;
+			d->config_port &= ~CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRA);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRA);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRA);
+		} else {
+			d->config_mem |= CCR_MSEL_B;
+			d->config_port |= CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRB);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRB);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRB);
+		}
+		/*
+		 * We fall-through here intentionally, since a 2D transfer is
+		 * similar to MEMCPY just adding the 2D slot configuration.
+		 */
+	case IMXDMA_DESC_MEMCPY:
+		imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2),
+			 DMA_CCR(imxdmac->channel));
+
+		imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
+
+		dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
+			"dma_length=%d\n", __func__, imxdmac->channel,
+			d->dest, d->src, d->len);
+
+		break;
+	/* Cyclic transfer is the same as slave_sg with special sg configuration. */
+	case IMXDMA_DESC_CYCLIC:
+	case IMXDMA_DESC_SLAVE_SG:
+		if (d->direction == DMA_DEV_TO_MEM) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_SAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
+				"total length=%d dev_addr=0x%08x (dev2mem)\n",
+				__func__, imxdmac->channel, d->sg, d->sgcount,
+				d->len, imxdmac->per_address);
+		} else if (d->direction == DMA_MEM_TO_DEV) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_DAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
+				"total length=%d dev_addr=0x%08x (mem2dev)\n",
+				__func__, imxdmac->channel, d->sg, d->sgcount,
+				d->len, imxdmac->per_address);
+		} else {
+			dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
+				__func__, imxdmac->channel);
+			return -EINVAL;
+		}
+
+		imxdma_sg_next(d);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+	imxdma_enable_hw(d);
+	return 0;
+}
+
+static void imxdma_tasklet(unsigned long data)
+{
+	struct imxdma_channel *imxdmac = (void *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	spin_lock(&imxdma->lock);
+
+	if (list_empty(&imxdmac->ld_active)) {
+		/* Someone might have called terminate all */
+		goto out;
+	}
+	desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node);
+
+	if (desc->desc.callback)
+		desc->desc.callback(desc->desc.callback_param);
+
+	/* If we are dealing with a cyclic descriptor keep it on ld_active
+	 * and dont mark the descripor as complete.
+	 * Only in non-cyclic cases it would be marked as complete
+	 */
+	if (imxdma_chan_is_doing_cyclic(imxdmac))
+		goto out;
+	else
+		dma_cookie_complete(&desc->desc);
+
+	/* Free 2D slot if it was an interleaved transfer */
+	if (imxdmac->enabled_2d) {
+		imxdma->slots_2d[imxdmac->slot_2d].count--;
+		imxdmac->enabled_2d = false;
+	}
+
+	list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
+
+	if (!list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc,
+					node);
+		list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active);
+		if (imxdma_xfer_desc(desc) < 0)
+			dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n",
+				 __func__, imxdmac->channel);
+	}
+out:
+	spin_unlock(&imxdma->lock);
+}
+
+static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
+	unsigned int mode = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		imxdma_disable_hw(imxdmac);
+
+		spin_lock_irqsave(&imxdma->lock, flags);
+		list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+		list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
+			imxdmac->per_address = dmaengine_cfg->src_addr;
+			imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
+			imxdmac->word_size = dmaengine_cfg->src_addr_width;
+		} else {
+			imxdmac->per_address = dmaengine_cfg->dst_addr;
+			imxdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+			imxdmac->word_size = dmaengine_cfg->dst_addr_width;
+		}
+
+		switch (imxdmac->word_size) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			mode = IMX_DMA_MEMSIZE_8;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			mode = IMX_DMA_MEMSIZE_16;
+			break;
+		default:
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			mode = IMX_DMA_MEMSIZE_32;
+			break;
+		}
+
+		imxdmac->hw_chaining = 1;
+		if (!imxdma_hw_chain(imxdmac))
+			return -EINVAL;
+		imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) |
+			((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) |
+			CCR_REN;
+		imxdmac->ccr_to_device =
+			(IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) |
+			((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN;
+		imx_dmav1_writel(imxdma, imxdmac->dma_request,
+				 DMA_RSSR(imxdmac->channel));
+
+		/* Set burst length */
+		imx_dmav1_writel(imxdma, imxdmac->watermark_level *
+				imxdmac->word_size, DMA_BLR(imxdmac->channel));
+
+		return 0;
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
+static enum dma_status imxdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue);
+	cookie = dma_cookie_assign(tx);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	return cookie;
+}
+
+static int imxdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+
+	if (data != NULL)
+		imxdmac->dma_request = data->dma_request;
+
+	while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) {
+		struct imxdma_desc *desc;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			break;
+		__memzero(&desc->desc, sizeof(struct dma_async_tx_descriptor));
+		dma_async_tx_descriptor_init(&desc->desc, chan);
+		desc->desc.tx_submit = imxdma_tx_submit;
+		/* txd.flags will be overwritten in prep funcs */
+		desc->desc.flags = DMA_CTRL_ACK;
+		desc->status = DMA_SUCCESS;
+
+		list_add_tail(&desc->node, &imxdmac->ld_free);
+		imxdmac->descs_allocated++;
+	}
+
+	if (!imxdmac->descs_allocated)
+		return -ENOMEM;
+
+	return imxdmac->descs_allocated;
+}
+
+static void imxdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc, *_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+
+	imxdma_disable_hw(imxdmac);
+	list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+	list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) {
+		kfree(desc);
+		imxdmac->descs_allocated--;
+	}
+	INIT_LIST_HEAD(&imxdmac->ld_free);
+
+	if (imxdmac->sg_list) {
+		kfree(imxdmac->sg_list);
+		imxdmac->sg_list = NULL;
+	}
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct scatterlist *sg;
+	int i, dma_length = 0;
+	struct imxdma_desc *desc;
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_length += sg->length;
+	}
+
+	switch (imxdmac->word_size) {
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		if (sgl->length & 3 || sgl->dma_address & 3)
+			return NULL;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		if (sgl->length & 1 || sgl->dma_address & 1)
+			return NULL;
+		break;
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		break;
+	default:
+		return NULL;
+	}
+
+	desc->type = IMXDMA_DESC_SLAVE_SG;
+	desc->sg = sgl;
+	desc->sgcount = sg_len;
+	desc->len = dma_length;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	int i;
+	unsigned int periods = buf_len / period_len;
+
+	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+			__func__, imxdmac->channel, buf_len, period_len);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	if (imxdmac->sg_list)
+		kfree(imxdmac->sg_list);
+
+	imxdmac->sg_list = kcalloc(periods + 1,
+			sizeof(struct scatterlist), GFP_KERNEL);
+	if (!imxdmac->sg_list)
+		return NULL;
+
+	sg_init_table(imxdmac->sg_list, periods);
+
+	for (i = 0; i < periods; i++) {
+		imxdmac->sg_list[i].page_link = 0;
+		imxdmac->sg_list[i].offset = 0;
+		imxdmac->sg_list[i].dma_address = dma_addr;
+		imxdmac->sg_list[i].length = period_len;
+		dma_addr += period_len;
+	}
+
+	/* close the loop */
+	imxdmac->sg_list[periods].offset = 0;
+	imxdmac->sg_list[periods].length = 0;
+	imxdmac->sg_list[periods].page_link =
+		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
+
+	desc->type = IMXDMA_DESC_CYCLIC;
+	desc->sg = imxdmac->sg_list;
+	desc->sgcount = periods;
+	desc->len = IMX_DMA_LENGTH_LOOP;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest,
+	dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
+			__func__, imxdmac->channel, src, dest, len);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_MEMCPY;
+	desc->src = src;
+	desc->dest = dest;
+	desc->len = len;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
+		"   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
+		imxdmac->channel, xt->src_start, xt->dst_start,
+		xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
+		xt->numf, xt->frame_size);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM)
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_INTERLEAVED;
+	desc->src = xt->src_start;
+	desc->dest = xt->dst_start;
+	desc->x = xt->sgl[0].size;
+	desc->y = xt->numf;
+	desc->w = xt->sgl[0].icg + desc->x;
+	desc->len = desc->x * desc->y;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32;
+	desc->config_mem = IMX_DMA_MEMSIZE_32;
+	if (xt->src_sgl)
+		desc->config_mem |= IMX_DMA_TYPE_2D;
+	if (xt->dst_sgl)
+		desc->config_port |= IMX_DMA_TYPE_2D;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static void imxdma_issue_pending(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	if (list_empty(&imxdmac->ld_active) &&
+	    !list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue,
+					struct imxdma_desc, node);
+
+		if (imxdma_xfer_desc(desc) < 0) {
+			dev_warn(imxdma->dev,
+				 "%s: channel: %d couldn't issue DMA xfer\n",
+				 __func__, imxdmac->channel);
+		} else {
+			list_move_tail(imxdmac->ld_queue.next,
+				       &imxdmac->ld_active);
+		}
+	}
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+}
+
+static int __init imxdma_probe(struct platform_device *pdev)
+	{
+	struct imxdma_engine *imxdma;
+	int ret, i;
+
+
+	imxdma = kzalloc(sizeof(*imxdma), GFP_KERNEL);
+	if (!imxdma)
+		return -ENOMEM;
+
+	if (cpu_is_mx1()) {
+		imxdma->base = MX1_IO_ADDRESS(MX1_DMA_BASE_ADDR);
+	} else if (cpu_is_mx21()) {
+		imxdma->base = MX21_IO_ADDRESS(MX21_DMA_BASE_ADDR);
+	} else if (cpu_is_mx27()) {
+		imxdma->base = MX27_IO_ADDRESS(MX27_DMA_BASE_ADDR);
+	} else {
+		kfree(imxdma);
+		return 0;
+	}
+
+	imxdma->dma_clk = clk_get(NULL, "dma");
+	if (IS_ERR(imxdma->dma_clk))
+		return PTR_ERR(imxdma->dma_clk);
+	clk_enable(imxdma->dma_clk);
+
+	/* reset DMA module */
+	imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
+
+	if (cpu_is_mx1()) {
+		ret = request_irq(MX1_DMA_INT, dma_irq_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register IRQ for DMA\n");
+			kfree(imxdma);
+			return ret;
+		}
+
+		ret = request_irq(MX1_DMA_ERR, imxdma_err_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n");
+			free_irq(MX1_DMA_INT, NULL);
+			kfree(imxdma);
+			return ret;
+		}
+	}
+
+	/* enable DMA module */
+	imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR);
+
+	/* clear all interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR);
+
+	/* disable interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR);
+
+	INIT_LIST_HEAD(&imxdma->dma_device.channels);
+
+	dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask);
+
+	/* Initialize 2D global parameters */
+	for (i = 0; i < IMX_DMA_2D_SLOTS; i++)
+		imxdma->slots_2d[i].count = 0;
+
+	spin_lock_init(&imxdma->lock);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		if (cpu_is_mx21() || cpu_is_mx27()) {
+			ret = request_irq(MX2x_INT_DMACH0 + i,
+					dma_irq_handler, 0, "DMA", imxdma);
+			if (ret) {
+				dev_warn(imxdma->dev, "Can't register IRQ %d "
+					 "for DMA channel %d\n",
+					 MX2x_INT_DMACH0 + i, i);
+				goto err_init;
+			}
+			init_timer(&imxdmac->watchdog);
+			imxdmac->watchdog.function = &imxdma_watchdog;
+			imxdmac->watchdog.data = (unsigned long)imxdmac;
+		}
+
+		imxdmac->imxdma = imxdma;
+
+		INIT_LIST_HEAD(&imxdmac->ld_queue);
+		INIT_LIST_HEAD(&imxdmac->ld_free);
+		INIT_LIST_HEAD(&imxdmac->ld_active);
+
+		tasklet_init(&imxdmac->dma_tasklet, imxdma_tasklet,
+			     (unsigned long)imxdmac);
+		imxdmac->chan.device = &imxdma->dma_device;
+		dma_cookie_init(&imxdmac->chan);
+		imxdmac->channel = i;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&imxdmac->chan.device_node,
+			      &imxdma->dma_device.channels);
+	}
+
+	imxdma->dev = &pdev->dev;
+	imxdma->dma_device.dev = &pdev->dev;
+
+	imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources;
+	imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources;
+	imxdma->dma_device.device_tx_status = imxdma_tx_status;
+	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
+	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+	imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy;
+	imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved;
+	imxdma->dma_device.device_control = imxdma_control;
+	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
+
+	platform_set_drvdata(pdev, imxdma);
+
+	imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */
+	imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms;
+	dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff);
+
+	ret = dma_async_device_register(&imxdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	return 0;
+
+err_init:
+
+	if (cpu_is_mx21() || cpu_is_mx27()) {
+		while (--i >= 0)
+			free_irq(MX2x_INT_DMACH0 + i, NULL);
+	} else if cpu_is_mx1() {
+		free_irq(MX1_DMA_INT, NULL);
+		free_irq(MX1_DMA_ERR, NULL);
+	}
+
+	kfree(imxdma);
+	return ret;
+}
+
+static int __exit imxdma_remove(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma = platform_get_drvdata(pdev);
+	int i;
+
+        dma_async_device_unregister(&imxdma->dma_device);
+
+	if (cpu_is_mx21() || cpu_is_mx27()) {
+		for (i = 0; i < IMX_DMA_CHANNELS; i++)
+			free_irq(MX2x_INT_DMACH0 + i, NULL);
+	} else if cpu_is_mx1() {
+		free_irq(MX1_DMA_INT, NULL);
+		free_irq(MX1_DMA_ERR, NULL);
+	}
+
+        kfree(imxdma);
+
+        return 0;
+}
+
+static struct platform_driver imxdma_driver = {
+	.driver		= {
+		.name	= "imx-dma",
+	},
+	.remove		= __exit_p(imxdma_remove),
+};
+
+static int __init imxdma_module_init(void)
+{
+	return platform_driver_probe(&imxdma_driver, imxdma_probe);
+}
+subsys_initcall(imxdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX dma driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
new file mode 100644
index 00000000..d3e38e28
--- /dev/null
+++ b/drivers/dma/imx-sdma.c
@@ -0,0 +1,1460 @@
+/*
+ * drivers/dma/imx-sdma.c
+ *
+ * This file contains a driver for the Freescale Smart DMA engine
+ *
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ *
+ * Based on code from Freescale:
+ *
+ * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/irq.h>
+#include <mach/sdma.h>
+#include <mach/dma.h>
+#include <mach/hardware.h>
+
+#include "dmaengine.h"
+
+/* SDMA registers */
+#define SDMA_H_C0PTR		0x000
+#define SDMA_H_INTR		0x004
+#define SDMA_H_STATSTOP		0x008
+#define SDMA_H_START		0x00c
+#define SDMA_H_EVTOVR		0x010
+#define SDMA_H_DSPOVR		0x014
+#define SDMA_H_HOSTOVR		0x018
+#define SDMA_H_EVTPEND		0x01c
+#define SDMA_H_DSPENBL		0x020
+#define SDMA_H_RESET		0x024
+#define SDMA_H_EVTERR		0x028
+#define SDMA_H_INTRMSK		0x02c
+#define SDMA_H_PSW		0x030
+#define SDMA_H_EVTERRDBG	0x034
+#define SDMA_H_CONFIG		0x038
+#define SDMA_ONCE_ENB		0x040
+#define SDMA_ONCE_DATA		0x044
+#define SDMA_ONCE_INSTR		0x048
+#define SDMA_ONCE_STAT		0x04c
+#define SDMA_ONCE_CMD		0x050
+#define SDMA_EVT_MIRROR		0x054
+#define SDMA_ILLINSTADDR	0x058
+#define SDMA_CHN0ADDR		0x05c
+#define SDMA_ONCE_RTB		0x060
+#define SDMA_XTRIG_CONF1	0x070
+#define SDMA_XTRIG_CONF2	0x074
+#define SDMA_CHNENBL0_IMX35	0x200
+#define SDMA_CHNENBL0_IMX31	0x080
+#define SDMA_CHNPRI_0		0x100
+
+/*
+ * Buffer descriptor status values.
+ */
+#define BD_DONE  0x01
+#define BD_WRAP  0x02
+#define BD_CONT  0x04
+#define BD_INTR  0x08
+#define BD_RROR  0x10
+#define BD_LAST  0x20
+#define BD_EXTD  0x80
+
+/*
+ * Data Node descriptor status values.
+ */
+#define DND_END_OF_FRAME  0x80
+#define DND_END_OF_XFER   0x40
+#define DND_DONE          0x20
+#define DND_UNUSED        0x01
+
+/*
+ * IPCV2 descriptor status values.
+ */
+#define BD_IPCV2_END_OF_FRAME  0x40
+
+#define IPCV2_MAX_NODES        50
+/*
+ * Error bit set in the CCB status field by the SDMA,
+ * in setbd routine, in case of a transfer error
+ */
+#define DATA_ERROR  0x10000000
+
+/*
+ * Buffer descriptor commands.
+ */
+#define C0_ADDR             0x01
+#define C0_LOAD             0x02
+#define C0_DUMP             0x03
+#define C0_SETCTX           0x07
+#define C0_GETCTX           0x03
+#define C0_SETDM            0x01
+#define C0_SETPM            0x04
+#define C0_GETDM            0x02
+#define C0_GETPM            0x08
+/*
+ * Change endianness indicator in the BD command field
+ */
+#define CHANGE_ENDIANNESS   0x80
+
+/*
+ * Mode/Count of data node descriptors - IPCv2
+ */
+struct sdma_mode_count {
+	u32 count   : 16; /* size of the buffer pointed by this BD */
+	u32 status  :  8; /* E,R,I,C,W,D status bits stored here */
+	u32 command :  8; /* command mostlky used for channel 0 */
+};
+
+/*
+ * Buffer descriptor
+ */
+struct sdma_buffer_descriptor {
+	struct sdma_mode_count  mode;
+	u32 buffer_addr;	/* address of the buffer described */
+	u32 ext_buffer_addr;	/* extended buffer address */
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_channel_control - Channel control Block
+ *
+ * @current_bd_ptr	current buffer descriptor processed
+ * @base_bd_ptr		first element of buffer descriptor array
+ * @unused		padding. The SDMA engine expects an array of 128 byte
+ *			control blocks
+ */
+struct sdma_channel_control {
+	u32 current_bd_ptr;
+	u32 base_bd_ptr;
+	u32 unused[2];
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_state_registers - SDMA context for a channel
+ *
+ * @pc:		program counter
+ * @t:		test bit: status of arithmetic & test instruction
+ * @rpc:	return program counter
+ * @sf:		source fault while loading data
+ * @spc:	loop start program counter
+ * @df:		destination fault while storing data
+ * @epc:	loop end program counter
+ * @lm:		loop mode
+ */
+struct sdma_state_registers {
+	u32 pc     :14;
+	u32 unused1: 1;
+	u32 t      : 1;
+	u32 rpc    :14;
+	u32 unused0: 1;
+	u32 sf     : 1;
+	u32 spc    :14;
+	u32 unused2: 1;
+	u32 df     : 1;
+	u32 epc    :14;
+	u32 lm     : 2;
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_context_data - sdma context specific to a channel
+ *
+ * @channel_state:	channel state bits
+ * @gReg:		general registers
+ * @mda:		burst dma destination address register
+ * @msa:		burst dma source address register
+ * @ms:			burst dma status register
+ * @md:			burst dma data register
+ * @pda:		peripheral dma destination address register
+ * @psa:		peripheral dma source address register
+ * @ps:			peripheral dma status register
+ * @pd:			peripheral dma data register
+ * @ca:			CRC polynomial register
+ * @cs:			CRC accumulator register
+ * @dda:		dedicated core destination address register
+ * @dsa:		dedicated core source address register
+ * @ds:			dedicated core status register
+ * @dd:			dedicated core data register
+ */
+struct sdma_context_data {
+	struct sdma_state_registers  channel_state;
+	u32  gReg[8];
+	u32  mda;
+	u32  msa;
+	u32  ms;
+	u32  md;
+	u32  pda;
+	u32  psa;
+	u32  ps;
+	u32  pd;
+	u32  ca;
+	u32  cs;
+	u32  dda;
+	u32  dsa;
+	u32  ds;
+	u32  dd;
+	u32  scratch0;
+	u32  scratch1;
+	u32  scratch2;
+	u32  scratch3;
+	u32  scratch4;
+	u32  scratch5;
+	u32  scratch6;
+	u32  scratch7;
+} __attribute__ ((packed));
+
+#define NUM_BD (int)(PAGE_SIZE / sizeof(struct sdma_buffer_descriptor))
+
+struct sdma_engine;
+
+/**
+ * struct sdma_channel - housekeeping for a SDMA channel
+ *
+ * @sdma		pointer to the SDMA engine for this channel
+ * @channel		the channel number, matches dmaengine chan_id + 1
+ * @direction		transfer type. Needed for setting SDMA script
+ * @peripheral_type	Peripheral type. Needed for setting SDMA script
+ * @event_id0		aka dma request line
+ * @event_id1		for channels that use 2 events
+ * @word_size		peripheral access size
+ * @buf_tail		ID of the buffer that was processed
+ * @done		channel completion
+ * @num_bd		max NUM_BD. number of descriptors currently handling
+ */
+struct sdma_channel {
+	struct sdma_engine		*sdma;
+	unsigned int			channel;
+	enum dma_transfer_direction		direction;
+	enum sdma_peripheral_type	peripheral_type;
+	unsigned int			event_id0;
+	unsigned int			event_id1;
+	enum dma_slave_buswidth		word_size;
+	unsigned int			buf_tail;
+	struct completion		done;
+	unsigned int			num_bd;
+	struct sdma_buffer_descriptor	*bd;
+	dma_addr_t			bd_phys;
+	unsigned int			pc_from_device, pc_to_device;
+	unsigned long			flags;
+	dma_addr_t			per_address;
+	unsigned long			event_mask[2];
+	unsigned long			watermark_level;
+	u32				shp_addr, per_addr;
+	struct dma_chan			chan;
+	spinlock_t			lock;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	unsigned int			chn_count;
+	unsigned int			chn_real_count;
+};
+
+#define IMX_DMA_SG_LOOP		BIT(0)
+
+#define MAX_DMA_CHANNELS 32
+#define MXC_SDMA_DEFAULT_PRIORITY 1
+#define MXC_SDMA_MIN_PRIORITY 1
+#define MXC_SDMA_MAX_PRIORITY 7
+
+#define SDMA_FIRMWARE_MAGIC 0x414d4453
+
+/**
+ * struct sdma_firmware_header - Layout of the firmware image
+ *
+ * @magic		"SDMA"
+ * @version_major	increased whenever layout of struct sdma_script_start_addrs
+ *			changes.
+ * @version_minor	firmware minor version (for binary compatible changes)
+ * @script_addrs_start	offset of struct sdma_script_start_addrs in this image
+ * @num_script_addrs	Number of script addresses in this image
+ * @ram_code_start	offset of SDMA ram image in this firmware image
+ * @ram_code_size	size of SDMA ram image
+ * @script_addrs	Stores the start address of the SDMA scripts
+ *			(in SDMA memory space)
+ */
+struct sdma_firmware_header {
+	u32	magic;
+	u32	version_major;
+	u32	version_minor;
+	u32	script_addrs_start;
+	u32	num_script_addrs;
+	u32	ram_code_start;
+	u32	ram_code_size;
+};
+
+enum sdma_devtype {
+	IMX31_SDMA,	/* runs on i.mx31 */
+	IMX35_SDMA,	/* runs on i.mx35 and later */
+};
+
+struct sdma_engine {
+	struct device			*dev;
+	struct device_dma_parameters	dma_parms;
+	struct sdma_channel		channel[MAX_DMA_CHANNELS];
+	struct sdma_channel_control	*channel_control;
+	void __iomem			*regs;
+	enum sdma_devtype		devtype;
+	unsigned int			num_events;
+	struct sdma_context_data	*context;
+	dma_addr_t			context_phys;
+	struct dma_device		dma_device;
+	struct clk			*clk;
+	struct mutex			channel_0_lock;
+	struct sdma_script_start_addrs	*script_addrs;
+};
+
+static struct platform_device_id sdma_devtypes[] = {
+	{
+		.name = "imx31-sdma",
+		.driver_data = IMX31_SDMA,
+	}, {
+		.name = "imx35-sdma",
+		.driver_data = IMX35_SDMA,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(platform, sdma_devtypes);
+
+static const struct of_device_id sdma_dt_ids[] = {
+	{ .compatible = "fsl,imx31-sdma", .data = &sdma_devtypes[IMX31_SDMA], },
+	{ .compatible = "fsl,imx35-sdma", .data = &sdma_devtypes[IMX35_SDMA], },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sdma_dt_ids);
+
+#define SDMA_H_CONFIG_DSPDMA	BIT(12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS	BIT(11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR	BIT(4)  /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_CSM	(3)       /* indicates which context switch mode is selected*/
+
+static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
+{
+	u32 chnenbl0 = (sdma->devtype == IMX31_SDMA ? SDMA_CHNENBL0_IMX31 :
+						      SDMA_CHNENBL0_IMX35);
+	return chnenbl0 + event * 4;
+}
+
+static int sdma_config_ownership(struct sdma_channel *sdmac,
+		bool event_override, bool mcu_override, bool dsp_override)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	unsigned long evt, mcu, dsp;
+
+	if (event_override && mcu_override && dsp_override)
+		return -EINVAL;
+
+	evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR);
+	mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR);
+	dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR);
+
+	if (dsp_override)
+		__clear_bit(channel, &dsp);
+	else
+		__set_bit(channel, &dsp);
+
+	if (event_override)
+		__clear_bit(channel, &evt);
+	else
+		__set_bit(channel, &evt);
+
+	if (mcu_override)
+		__clear_bit(channel, &mcu);
+	else
+		__set_bit(channel, &mcu);
+
+	writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR);
+	writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR);
+	writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR);
+
+	return 0;
+}
+
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+	writel(BIT(channel), sdma->regs + SDMA_H_START);
+}
+
+/*
+ * sdma_run_channel - run a channel and wait till it's done
+ */
+static int sdma_run_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int ret;
+
+	init_completion(&sdmac->done);
+
+	sdma_enable_channel(sdma, channel);
+
+	ret = wait_for_completion_timeout(&sdmac->done, HZ);
+
+	return ret ? 0 : -ETIMEDOUT;
+}
+
+static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
+		u32 address)
+{
+	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	void *buf_virt;
+	dma_addr_t buf_phys;
+	int ret;
+
+	mutex_lock(&sdma->channel_0_lock);
+
+	buf_virt = dma_alloc_coherent(NULL,
+			size,
+			&buf_phys, GFP_KERNEL);
+	if (!buf_virt) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
+	bd0->mode.command = C0_SETPM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = size / 2;
+	bd0->buffer_addr = buf_phys;
+	bd0->ext_buffer_addr = address;
+
+	memcpy(buf_virt, buf, size);
+
+	ret = sdma_run_channel(&sdma->channel[0]);
+
+	dma_free_coherent(NULL, size, buf_virt, buf_phys);
+
+err_out:
+	mutex_unlock(&sdma->channel_0_lock);
+
+	return ret;
+}
+
+static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	unsigned long val;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__set_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
+}
+
+static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+	unsigned long val;
+
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__clear_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
+}
+
+static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+
+	/*
+	 * loop mode. Iterate over descriptors, re-setup them and
+	 * call callback function.
+	 */
+	while (1) {
+		bd = &sdmac->bd[sdmac->buf_tail];
+
+		if (bd->mode.status & BD_DONE)
+			break;
+
+		if (bd->mode.status & BD_RROR)
+			sdmac->status = DMA_ERROR;
+		else
+			sdmac->status = DMA_IN_PROGRESS;
+
+		bd->mode.status |= BD_DONE;
+		sdmac->buf_tail++;
+		sdmac->buf_tail %= sdmac->num_bd;
+
+		if (sdmac->desc.callback)
+			sdmac->desc.callback(sdmac->desc.callback_param);
+	}
+}
+
+static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+	int i, error = 0;
+
+	sdmac->chn_real_count = 0;
+	/*
+	 * non loop mode. Iterate over all descriptors, collect
+	 * errors and call callback function
+	 */
+	for (i = 0; i < sdmac->num_bd; i++) {
+		bd = &sdmac->bd[i];
+
+		 if (bd->mode.status & (BD_DONE | BD_RROR))
+			error = -EIO;
+		 sdmac->chn_real_count += bd->mode.count;
+	}
+
+	if (error)
+		sdmac->status = DMA_ERROR;
+	else
+		sdmac->status = DMA_SUCCESS;
+
+	dma_cookie_complete(&sdmac->desc);
+	if (sdmac->desc.callback)
+		sdmac->desc.callback(sdmac->desc.callback_param);
+}
+
+static void mxc_sdma_handle_channel(struct sdma_channel *sdmac)
+{
+	complete(&sdmac->done);
+
+	/* not interested in channel 0 interrupts */
+	if (sdmac->channel == 0)
+		return;
+
+	if (sdmac->flags & IMX_DMA_SG_LOOP)
+		sdma_handle_channel_loop(sdmac);
+	else
+		mxc_sdma_handle_channel_normal(sdmac);
+}
+
+static irqreturn_t sdma_int_handler(int irq, void *dev_id)
+{
+	struct sdma_engine *sdma = dev_id;
+	unsigned long stat;
+
+	stat = readl_relaxed(sdma->regs + SDMA_H_INTR);
+	writel_relaxed(stat, sdma->regs + SDMA_H_INTR);
+
+	while (stat) {
+		int channel = fls(stat) - 1;
+		struct sdma_channel *sdmac = &sdma->channel[channel];
+
+		mxc_sdma_handle_channel(sdmac);
+
+		__clear_bit(channel, &stat);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * sets the pc of SDMA script according to the peripheral type
+ */
+static void sdma_get_pc(struct sdma_channel *sdmac,
+		enum sdma_peripheral_type peripheral_type)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int per_2_emi = 0, emi_2_per = 0;
+	/*
+	 * These are needed once we start to support transfers between
+	 * two peripherals or memory-to-memory transfers
+	 */
+	int per_2_per = 0, emi_2_emi = 0;
+
+	sdmac->pc_from_device = 0;
+	sdmac->pc_to_device = 0;
+
+	switch (peripheral_type) {
+	case IMX_DMATYPE_MEMORY:
+		emi_2_emi = sdma->script_addrs->ap_2_ap_addr;
+		break;
+	case IMX_DMATYPE_DSP:
+		emi_2_per = sdma->script_addrs->bp_2_ap_addr;
+		per_2_emi = sdma->script_addrs->ap_2_bp_addr;
+		break;
+	case IMX_DMATYPE_FIRI:
+		per_2_emi = sdma->script_addrs->firi_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_firi_addr;
+		break;
+	case IMX_DMATYPE_UART:
+		per_2_emi = sdma->script_addrs->uart_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_UART_SP:
+		per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ATA:
+		per_2_emi = sdma->script_addrs->ata_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_ata_addr;
+		break;
+	case IMX_DMATYPE_CSPI:
+	case IMX_DMATYPE_EXT:
+	case IMX_DMATYPE_SSI:
+		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_SSI_SP:
+	case IMX_DMATYPE_MMC:
+	case IMX_DMATYPE_SDHC:
+	case IMX_DMATYPE_CSPI_SP:
+	case IMX_DMATYPE_ESAI:
+	case IMX_DMATYPE_MSHC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ASRC:
+		per_2_emi = sdma->script_addrs->asrc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
+	case IMX_DMATYPE_MSHC:
+		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
+		break;
+	case IMX_DMATYPE_CCM:
+		per_2_emi = sdma->script_addrs->dptc_dvfs_addr;
+		break;
+	case IMX_DMATYPE_SPDIF:
+		per_2_emi = sdma->script_addrs->spdif_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_spdif_addr;
+		break;
+	case IMX_DMATYPE_IPU_MEMORY:
+		emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr;
+		break;
+	default:
+		break;
+	}
+
+	sdmac->pc_from_device = per_2_emi;
+	sdmac->pc_to_device = emi_2_per;
+}
+
+static int sdma_load_context(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int load_address;
+	struct sdma_context_data *context = sdma->context;
+	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	int ret;
+
+	if (sdmac->direction == DMA_DEV_TO_MEM) {
+		load_address = sdmac->pc_from_device;
+	} else {
+		load_address = sdmac->pc_to_device;
+	}
+
+	if (load_address < 0)
+		return load_address;
+
+	dev_dbg(sdma->dev, "load_address = %d\n", load_address);
+	dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level);
+	dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
+	dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
+	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]);
+	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]);
+
+	mutex_lock(&sdma->channel_0_lock);
+
+	memset(context, 0, sizeof(*context));
+	context->channel_state.pc = load_address;
+
+	/* Send by context the event mask,base address for peripheral
+	 * and watermark level
+	 */
+	context->gReg[0] = sdmac->event_mask[1];
+	context->gReg[1] = sdmac->event_mask[0];
+	context->gReg[2] = sdmac->per_addr;
+	context->gReg[6] = sdmac->shp_addr;
+	context->gReg[7] = sdmac->watermark_level;
+
+	bd0->mode.command = C0_SETDM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = sizeof(*context) / 4;
+	bd0->buffer_addr = sdma->context_phys;
+	bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
+
+	ret = sdma_run_channel(&sdma->channel[0]);
+
+	mutex_unlock(&sdma->channel_0_lock);
+
+	return ret;
+}
+
+static void sdma_disable_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
+	sdmac->status = DMA_ERROR;
+}
+
+static int sdma_config_channel(struct sdma_channel *sdmac)
+{
+	int ret;
+
+	sdma_disable_channel(sdmac);
+
+	sdmac->event_mask[0] = 0;
+	sdmac->event_mask[1] = 0;
+	sdmac->shp_addr = 0;
+	sdmac->per_addr = 0;
+
+	if (sdmac->event_id0) {
+		if (sdmac->event_id0 >= sdmac->sdma->num_events)
+			return -EINVAL;
+		sdma_event_enable(sdmac, sdmac->event_id0);
+	}
+
+	switch (sdmac->peripheral_type) {
+	case IMX_DMATYPE_DSP:
+		sdma_config_ownership(sdmac, false, true, true);
+		break;
+	case IMX_DMATYPE_MEMORY:
+		sdma_config_ownership(sdmac, false, true, false);
+		break;
+	default:
+		sdma_config_ownership(sdmac, true, true, false);
+		break;
+	}
+
+	sdma_get_pc(sdmac, sdmac->peripheral_type);
+
+	if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) &&
+			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
+		/* Handle multiple event channels differently */
+		if (sdmac->event_id1) {
+			sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32);
+			if (sdmac->event_id1 > 31)
+				__set_bit(31, &sdmac->watermark_level);
+			sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32);
+			if (sdmac->event_id0 > 31)
+				__set_bit(30, &sdmac->watermark_level);
+		} else {
+			__set_bit(sdmac->event_id0, sdmac->event_mask);
+		}
+		/* Watermark Level */
+		sdmac->watermark_level |= sdmac->watermark_level;
+		/* Address */
+		sdmac->shp_addr = sdmac->per_address;
+	} else {
+		sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */
+	}
+
+	ret = sdma_load_context(sdmac);
+
+	return ret;
+}
+
+static int sdma_set_channel_priority(struct sdma_channel *sdmac,
+		unsigned int priority)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	if (priority < MXC_SDMA_MIN_PRIORITY
+	    || priority > MXC_SDMA_MAX_PRIORITY) {
+		return -EINVAL;
+	}
+
+	writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+
+	return 0;
+}
+
+static int sdma_request_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int ret = -EBUSY;
+
+	sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL);
+	if (!sdmac->bd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memset(sdmac->bd, 0, PAGE_SIZE);
+
+	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
+
+	init_completion(&sdmac->done);
+
+	sdmac->buf_tail = 0;
+
+	return 0;
+out:
+
+	return ret;
+}
+
+static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sdma_channel, chan);
+}
+
+static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	unsigned long flags;
+	struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&sdmac->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	spin_unlock_irqrestore(&sdmac->lock, flags);
+
+	return cookie;
+}
+
+static int sdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+	int prio, ret;
+
+	if (!data)
+		return -EINVAL;
+
+	switch (data->priority) {
+	case DMA_PRIO_HIGH:
+		prio = 3;
+		break;
+	case DMA_PRIO_MEDIUM:
+		prio = 2;
+		break;
+	case DMA_PRIO_LOW:
+	default:
+		prio = 1;
+		break;
+	}
+
+	sdmac->peripheral_type = data->peripheral_type;
+	sdmac->event_id0 = data->dma_request;
+
+	clk_enable(sdmac->sdma->clk);
+
+	ret = sdma_request_channel(sdmac);
+	if (ret)
+		return ret;
+
+	ret = sdma_set_channel_priority(sdmac, prio);
+	if (ret)
+		return ret;
+
+	dma_async_tx_descriptor_init(&sdmac->desc, chan);
+	sdmac->desc.tx_submit = sdma_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	sdmac->desc.flags = DMA_CTRL_ACK;
+
+	return 0;
+}
+
+static void sdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	sdma_disable_channel(sdmac);
+
+	if (sdmac->event_id0)
+		sdma_event_disable(sdmac, sdmac->event_id0);
+	if (sdmac->event_id1)
+		sdma_event_disable(sdmac, sdmac->event_id1);
+
+	sdmac->event_id0 = 0;
+	sdmac->event_id1 = 0;
+
+	sdma_set_channel_priority(sdmac, 0);
+
+	dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys);
+
+	clk_disable(sdma->clk);
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int ret, i, count;
+	int channel = sdmac->channel;
+	struct scatterlist *sg;
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+	sdmac->status = DMA_IN_PROGRESS;
+
+	sdmac->flags = 0;
+
+	dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n",
+			sg_len, channel);
+
+	sdmac->direction = direction;
+	ret = sdma_load_context(sdmac);
+	if (ret)
+		goto err_out;
+
+	if (sg_len > NUM_BD) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
+				channel, sg_len, NUM_BD);
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	sdmac->chn_count = 0;
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		int param;
+
+		bd->buffer_addr = sg->dma_address;
+
+		count = sg->length;
+
+		if (count > 0xffff) {
+			dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n",
+					channel, count, 0xffff);
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		bd->mode.count = count;
+		sdmac->chn_count += count;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) {
+			ret =  -EINVAL;
+			goto err_out;
+		}
+
+		switch (sdmac->word_size) {
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			bd->mode.command = 0;
+			if (count & 3 || sg->dma_address & 3)
+				return NULL;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			bd->mode.command = 2;
+			if (count & 1 || sg->dma_address & 1)
+				return NULL;
+			break;
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			bd->mode.command = 1;
+			break;
+		default:
+			return NULL;
+		}
+
+		param = BD_DONE | BD_EXTD | BD_CONT;
+
+		if (i + 1 == sg_len) {
+			param |= BD_INTR;
+			param |= BD_LAST;
+			param &= ~BD_CONT;
+		}
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
+				i, count, sg->dma_address,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+	}
+
+	sdmac->num_bd = sg_len;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	return &sdmac->desc;
+err_out:
+	sdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int num_periods = buf_len / period_len;
+	int channel = sdmac->channel;
+	int ret, i = 0, buf = 0;
+
+	dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	sdmac->status = DMA_IN_PROGRESS;
+
+	sdmac->flags |= IMX_DMA_SG_LOOP;
+	sdmac->direction = direction;
+	ret = sdma_load_context(sdmac);
+	if (ret)
+		goto err_out;
+
+	if (num_periods > NUM_BD) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
+				channel, num_periods, NUM_BD);
+		goto err_out;
+	}
+
+	if (period_len > 0xffff) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %d > %d\n",
+				channel, period_len, 0xffff);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		int param;
+
+		bd->buffer_addr = dma_addr;
+
+		bd->mode.count = period_len;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+			goto err_out;
+		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			bd->mode.command = 0;
+		else
+			bd->mode.command = sdmac->word_size;
+
+		param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR;
+		if (i + 1 == num_periods)
+			param |= BD_WRAP;
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
+				i, period_len, dma_addr,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+
+	sdmac->num_bd = num_periods;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	return &sdmac->desc;
+err_out:
+	sdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		sdma_disable_channel(sdmac);
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
+			sdmac->per_address = dmaengine_cfg->src_addr;
+			sdmac->watermark_level = dmaengine_cfg->src_maxburst *
+						dmaengine_cfg->src_addr_width;
+			sdmac->word_size = dmaengine_cfg->src_addr_width;
+		} else {
+			sdmac->per_address = dmaengine_cfg->dst_addr;
+			sdmac->watermark_level = dmaengine_cfg->dst_maxburst *
+						dmaengine_cfg->dst_addr_width;
+			sdmac->word_size = dmaengine_cfg->dst_addr_width;
+		}
+		sdmac->direction = dmaengine_cfg->direction;
+		return sdma_config_channel(sdmac);
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
+static enum dma_status sdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	dma_cookie_t last_used;
+
+	last_used = chan->cookie;
+
+	dma_set_tx_state(txstate, chan->completed_cookie, last_used,
+			sdmac->chn_count - sdmac->chn_real_count);
+
+	return sdmac->status;
+}
+
+static void sdma_issue_pending(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		sdma_enable_channel(sdma, sdmac->channel);
+}
+
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1	34
+
+static void sdma_add_scripts(struct sdma_engine *sdma,
+		const struct sdma_script_start_addrs *addr)
+{
+	s32 *addr_arr = (u32 *)addr;
+	s32 *saddr_arr = (u32 *)sdma->script_addrs;
+	int i;
+
+	for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++)
+		if (addr_arr[i] > 0)
+			saddr_arr[i] = addr_arr[i];
+}
+
+static void sdma_load_firmware(const struct firmware *fw, void *context)
+{
+	struct sdma_engine *sdma = context;
+	const struct sdma_firmware_header *header;
+	const struct sdma_script_start_addrs *addr;
+	unsigned short *ram_code;
+
+	if (!fw) {
+		dev_err(sdma->dev, "firmware not found\n");
+		return;
+	}
+
+	if (fw->size < sizeof(*header))
+		goto err_firmware;
+
+	header = (struct sdma_firmware_header *)fw->data;
+
+	if (header->magic != SDMA_FIRMWARE_MAGIC)
+		goto err_firmware;
+	if (header->ram_code_start + header->ram_code_size > fw->size)
+		goto err_firmware;
+
+	addr = (void *)header + header->script_addrs_start;
+	ram_code = (void *)header + header->ram_code_start;
+
+	clk_enable(sdma->clk);
+	/* download the RAM image for SDMA */
+	sdma_load_script(sdma, ram_code,
+			header->ram_code_size,
+			addr->ram_code_start_addr);
+	clk_disable(sdma->clk);
+
+	sdma_add_scripts(sdma, addr);
+
+	dev_info(sdma->dev, "loaded firmware %d.%d\n",
+			header->version_major,
+			header->version_minor);
+
+err_firmware:
+	release_firmware(fw);
+}
+
+static int __init sdma_get_firmware(struct sdma_engine *sdma,
+		const char *fw_name)
+{
+	int ret;
+
+	ret = request_firmware_nowait(THIS_MODULE,
+			FW_ACTION_HOTPLUG, fw_name, sdma->dev,
+			GFP_KERNEL, sdma, sdma_load_firmware);
+
+	return ret;
+}
+
+static int __init sdma_init(struct sdma_engine *sdma)
+{
+	int i, ret;
+	dma_addr_t ccb_phys;
+
+	switch (sdma->devtype) {
+	case IMX31_SDMA:
+		sdma->num_events = 32;
+		break;
+	case IMX35_SDMA:
+		sdma->num_events = 48;
+		break;
+	default:
+		dev_err(sdma->dev, "Unknown sdma type %d. aborting\n",
+			sdma->devtype);
+		return -ENODEV;
+	}
+
+	clk_enable(sdma->clk);
+
+	/* Be sure SDMA has not started yet */
+	writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
+
+	sdma->channel_control = dma_alloc_coherent(NULL,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
+			sizeof(struct sdma_context_data),
+			&ccb_phys, GFP_KERNEL);
+
+	if (!sdma->channel_control) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	sdma->context = (void *)sdma->channel_control +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+	sdma->context_phys = ccb_phys +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+
+	/* Zero-out the CCB structures array just allocated */
+	memset(sdma->channel_control, 0,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control));
+
+	/* disable all channels */
+	for (i = 0; i < sdma->num_events; i++)
+		writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
+
+	/* All channels have priority 0 */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++)
+		writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+
+	ret = sdma_request_channel(&sdma->channel[0]);
+	if (ret)
+		goto err_dma_alloc;
+
+	sdma_config_ownership(&sdma->channel[0], false, true, false);
+
+	/* Set Command Channel (Channel Zero) */
+	writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR);
+
+	/* Set bits of CONFIG register but with static context switching */
+	/* FIXME: Check whether to set ACR bit depending on clock ratios */
+	writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
+
+	writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+
+	/* Set bits of CONFIG register with given context switching mode */
+	writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+
+	/* Initializes channel's priorities */
+	sdma_set_channel_priority(&sdma->channel[0], 7);
+
+	clk_disable(sdma->clk);
+
+	return 0;
+
+err_dma_alloc:
+	clk_disable(sdma->clk);
+	dev_err(sdma->dev, "initialisation failed with %d\n", ret);
+	return ret;
+}
+
+static int __init sdma_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+			of_match_device(sdma_dt_ids, &pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
+	const char *fw_name;
+	int ret;
+	int irq;
+	struct resource *iores;
+	struct sdma_platform_data *pdata = pdev->dev.platform_data;
+	int i;
+	struct sdma_engine *sdma;
+	s32 *saddr_arr;
+
+	sdma = kzalloc(sizeof(*sdma), GFP_KERNEL);
+	if (!sdma)
+		return -ENOMEM;
+
+	mutex_init(&sdma->channel_0_lock);
+
+	sdma->dev = &pdev->dev;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!iores || irq < 0) {
+		ret = -EINVAL;
+		goto err_irq;
+	}
+
+	if (!request_mem_region(iores->start, resource_size(iores), pdev->name)) {
+		ret = -EBUSY;
+		goto err_request_region;
+	}
+
+	sdma->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sdma->clk)) {
+		ret = PTR_ERR(sdma->clk);
+		goto err_clk;
+	}
+
+	sdma->regs = ioremap(iores->start, resource_size(iores));
+	if (!sdma->regs) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	ret = request_irq(irq, sdma_int_handler, 0, "sdma", sdma);
+	if (ret)
+		goto err_request_irq;
+
+	sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
+	if (!sdma->script_addrs) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* initially no scripts available */
+	saddr_arr = (s32 *)sdma->script_addrs;
+	for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++)
+		saddr_arr[i] = -EINVAL;
+
+	if (of_id)
+		pdev->id_entry = of_id->data;
+	sdma->devtype = pdev->id_entry->driver_data;
+
+	dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
+
+	INIT_LIST_HEAD(&sdma->dma_device.channels);
+	/* Initialize channel parameters */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct sdma_channel *sdmac = &sdma->channel[i];
+
+		sdmac->sdma = sdma;
+		spin_lock_init(&sdmac->lock);
+
+		sdmac->chan.device = &sdma->dma_device;
+		dma_cookie_init(&sdmac->chan);
+		sdmac->channel = i;
+
+		/*
+		 * Add the channel to the DMAC list. Do not add channel 0 though
+		 * because we need it internally in the SDMA driver. This also means
+		 * that channel 0 in dmaengine counting matches sdma channel 1.
+		 */
+		if (i)
+			list_add_tail(&sdmac->chan.device_node,
+					&sdma->dma_device.channels);
+	}
+
+	ret = sdma_init(sdma);
+	if (ret)
+		goto err_init;
+
+	if (pdata && pdata->script_addrs)
+		sdma_add_scripts(sdma, pdata->script_addrs);
+
+	if (pdata) {
+		ret = sdma_get_firmware(sdma, pdata->fw_name);
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
+	} else {
+		/*
+		 * Because that device tree does not encode ROM script address,
+		 * the RAM script in firmware is mandatory for device tree
+		 * probe, otherwise it fails.
+		 */
+		ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
+					      &fw_name);
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware name\n");
+		else {
+			ret = sdma_get_firmware(sdma, fw_name);
+			if (ret)
+				dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
+		}
+	}
+
+	sdma->dma_device.dev = &pdev->dev;
+
+	sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
+	sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources;
+	sdma->dma_device.device_tx_status = sdma_tx_status;
+	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
+	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
+	sdma->dma_device.device_control = sdma_control;
+	sdma->dma_device.device_issue_pending = sdma_issue_pending;
+	sdma->dma_device.dev->dma_parms = &sdma->dma_parms;
+	dma_set_max_seg_size(sdma->dma_device.dev, 65535);
+
+	ret = dma_async_device_register(&sdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	dev_info(sdma->dev, "initialized\n");
+
+	return 0;
+
+err_init:
+	kfree(sdma->script_addrs);
+err_alloc:
+	free_irq(irq, sdma);
+err_request_irq:
+	iounmap(sdma->regs);
+err_ioremap:
+	clk_put(sdma->clk);
+err_clk:
+	release_mem_region(iores->start, resource_size(iores));
+err_request_region:
+err_irq:
+	kfree(sdma);
+	return ret;
+}
+
+static int __exit sdma_remove(struct platform_device *pdev)
+{
+	return -EBUSY;
+}
+
+static struct platform_driver sdma_driver = {
+	.driver		= {
+		.name	= "imx-sdma",
+		.of_match_table = sdma_dt_ids,
+	},
+	.id_table	= sdma_devtypes,
+	.remove		= __exit_p(sdma_remove),
+};
+
+static int __init sdma_module_init(void)
+{
+	return platform_driver_probe(&sdma_driver, sdma_probe);
+}
+module_init(sdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX SDMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
new file mode 100644
index 00000000..c900ca7a
--- /dev/null
+++ b/drivers/dma/intel_mid_dma.c
@@ -0,0 +1,1460 @@
+/*
+ *  intel_mid_dma.c - Intel Langwell DMA Drivers
+ *
+ *  Copyright (C) 2008-10 Intel Corp
+ *  Author: Vinod Koul <vinod.koul@intel.com>
+ *  The driver design is based on dw_dmac driver
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *
+ */
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/intel_mid_dma.h>
+#include <linux/module.h>
+
+#include "dmaengine.h"
+
+#define MAX_CHAN	4 /*max ch across controllers*/
+#include "intel_mid_dma_regs.h"
+
+#define INTEL_MID_DMAC1_ID		0x0814
+#define INTEL_MID_DMAC2_ID		0x0813
+#define INTEL_MID_GP_DMAC2_ID		0x0827
+#define INTEL_MFLD_DMAC1_ID		0x0830
+#define LNW_PERIPHRAL_MASK_BASE		0xFFAE8008
+#define LNW_PERIPHRAL_MASK_SIZE		0x10
+#define LNW_PERIPHRAL_STATUS		0x0
+#define LNW_PERIPHRAL_MASK		0x8
+
+struct intel_mid_dma_probe_info {
+	u8 max_chan;
+	u8 ch_base;
+	u16 block_size;
+	u32 pimr_mask;
+};
+
+#define INFO(_max_chan, _ch_base, _block_size, _pimr_mask) \
+	((kernel_ulong_t)&(struct intel_mid_dma_probe_info) {	\
+		.max_chan = (_max_chan),			\
+		.ch_base = (_ch_base),				\
+		.block_size = (_block_size),			\
+		.pimr_mask = (_pimr_mask),			\
+	})
+
+/*****************************************************************************
+Utility Functions*/
+/**
+ * get_ch_index	-	convert status to channel
+ * @status: status mask
+ * @base: dma ch base value
+ *
+ * Modify the status mask and return the channel index needing
+ * attention (or -1 if neither)
+ */
+static int get_ch_index(int *status, unsigned int base)
+{
+	int i;
+	for (i = 0; i < MAX_CHAN; i++) {
+		if (*status & (1 << (i + base))) {
+			*status = *status & ~(1 << (i + base));
+			pr_debug("MDMA: index %d New status %x\n", i, *status);
+			return i;
+		}
+	}
+	return -1;
+}
+
+/**
+ * get_block_ts	-	calculates dma transaction length
+ * @len: dma transfer length
+ * @tx_width: dma transfer src width
+ * @block_size: dma controller max block size
+ *
+ * Based on src width calculate the DMA trsaction length in data items
+ * return data items or FFFF if exceeds max length for block
+ */
+static int get_block_ts(int len, int tx_width, int block_size)
+{
+	int byte_width = 0, block_ts = 0;
+
+	switch (tx_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		byte_width = 1;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		byte_width = 2;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	default:
+		byte_width = 4;
+		break;
+	}
+
+	block_ts = len/byte_width;
+	if (block_ts > block_size)
+		block_ts = 0xFFFF;
+	return block_ts;
+}
+
+/*****************************************************************************
+DMAC1 interrupt Functions*/
+
+/**
+ * dmac1_mask_periphral_intr -	mask the periphral interrupt
+ * @mid: dma device for which masking is required
+ *
+ * Masks the DMA periphral interrupt
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void dmac1_mask_periphral_intr(struct middma_device *mid)
+{
+	u32 pimr;
+
+	if (mid->pimr_mask) {
+		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
+		pimr |= mid->pimr_mask;
+		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
+	}
+	return;
+}
+
+/**
+ * dmac1_unmask_periphral_intr -	unmask the periphral interrupt
+ * @midc: dma channel for which masking is required
+ *
+ * UnMasks the DMA periphral interrupt,
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void dmac1_unmask_periphral_intr(struct intel_mid_dma_chan *midc)
+{
+	u32 pimr;
+	struct middma_device *mid = to_middma_device(midc->chan.device);
+
+	if (mid->pimr_mask) {
+		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
+		pimr &= ~mid->pimr_mask;
+		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
+	}
+	return;
+}
+
+/**
+ * enable_dma_interrupt -	enable the periphral interrupt
+ * @midc: dma channel for which enable interrupt is required
+ *
+ * Enable the DMA periphral interrupt,
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void enable_dma_interrupt(struct intel_mid_dma_chan *midc)
+{
+	dmac1_unmask_periphral_intr(midc);
+
+	/*en ch interrupts*/
+	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
+	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
+	return;
+}
+
+/**
+ * disable_dma_interrupt -	disable the periphral interrupt
+ * @midc: dma channel for which disable interrupt is required
+ *
+ * Disable the DMA periphral interrupt,
+ * this is valid for DMAC1 family controllers only
+ * This controller should have periphral mask registers already mapped
+ */
+static void disable_dma_interrupt(struct intel_mid_dma_chan *midc)
+{
+	/*Check LPE PISR, make sure fwd is disabled*/
+	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_BLOCK);
+	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
+	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
+	return;
+}
+
+/*****************************************************************************
+DMA channel helper Functions*/
+/**
+ * mid_desc_get		-	get a descriptor
+ * @midc: dma channel for which descriptor is required
+ *
+ * Obtain a descriptor for the channel. Returns NULL if none are free.
+ * Once the descriptor is returned it is private until put on another
+ * list or freed
+ */
+static struct intel_mid_dma_desc *midc_desc_get(struct intel_mid_dma_chan *midc)
+{
+	struct intel_mid_dma_desc *desc, *_desc;
+	struct intel_mid_dma_desc *ret = NULL;
+
+	spin_lock_bh(&midc->lock);
+	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+	}
+	spin_unlock_bh(&midc->lock);
+	return ret;
+}
+
+/**
+ * mid_desc_put		-	put a descriptor
+ * @midc: dma channel for which descriptor is required
+ * @desc: descriptor to put
+ *
+ * Return a descriptor from lwn_desc_get back to the free pool
+ */
+static void midc_desc_put(struct intel_mid_dma_chan *midc,
+			struct intel_mid_dma_desc *desc)
+{
+	if (desc) {
+		spin_lock_bh(&midc->lock);
+		list_add_tail(&desc->desc_node, &midc->free_list);
+		spin_unlock_bh(&midc->lock);
+	}
+}
+/**
+ * midc_dostart		-		begin a DMA transaction
+ * @midc: channel for which txn is to be started
+ * @first: first descriptor of series
+ *
+ * Load a transaction into the engine. This must be called with midc->lock
+ * held and bh disabled.
+ */
+static void midc_dostart(struct intel_mid_dma_chan *midc,
+			struct intel_mid_dma_desc *first)
+{
+	struct middma_device *mid = to_middma_device(midc->chan.device);
+
+	/*  channel is idle */
+	if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) {
+		/*error*/
+		pr_err("ERR_MDMA: channel is busy in start\n");
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+	midc->busy = true;
+	/*write registers and en*/
+	iowrite32(first->sar, midc->ch_regs + SAR);
+	iowrite32(first->dar, midc->ch_regs + DAR);
+	iowrite32(first->lli_phys, midc->ch_regs + LLP);
+	iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
+	iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
+	iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
+	iowrite32(first->ctl_hi, midc->ch_regs + CTL_HIGH);
+	pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
+		(int)first->sar, (int)first->dar, first->cfg_hi,
+		first->cfg_lo, first->ctl_hi, first->ctl_lo);
+	first->status = DMA_IN_PROGRESS;
+
+	iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
+}
+
+/**
+ * midc_descriptor_complete	-	process completed descriptor
+ * @midc: channel owning the descriptor
+ * @desc: the descriptor itself
+ *
+ * Process a completed descriptor and perform any callbacks upon
+ * the completion. The completion handling drops the lock during the
+ * callbacks but must be called with the lock held.
+ */
+static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
+		struct intel_mid_dma_desc *desc)
+		__releases(&midc->lock) __acquires(&midc->lock)
+{
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	dma_async_tx_callback callback_txd = NULL;
+	struct intel_mid_dma_lli	*llitem;
+	void *param_txd = NULL;
+
+	dma_cookie_complete(txd);
+	callback_txd = txd->callback;
+	param_txd = txd->callback_param;
+
+	if (desc->lli != NULL) {
+		/*clear the DONE bit of completed LLI in memory*/
+		llitem = desc->lli + desc->current_lli;
+		llitem->ctl_hi &= CLEAR_DONE;
+		if (desc->current_lli < desc->lli_length-1)
+			(desc->current_lli)++;
+		else
+			desc->current_lli = 0;
+	}
+	spin_unlock_bh(&midc->lock);
+	if (callback_txd) {
+		pr_debug("MDMA: TXD callback set ... calling\n");
+		callback_txd(param_txd);
+	}
+	if (midc->raw_tfr) {
+		desc->status = DMA_SUCCESS;
+		if (desc->lli != NULL) {
+			pci_pool_free(desc->lli_pool, desc->lli,
+						desc->lli_phys);
+			pci_pool_destroy(desc->lli_pool);
+			desc->lli = NULL;
+		}
+		list_move(&desc->desc_node, &midc->free_list);
+		midc->busy = false;
+	}
+	spin_lock_bh(&midc->lock);
+
+}
+/**
+ * midc_scan_descriptors -		check the descriptors in channel
+ *					mark completed when tx is completete
+ * @mid: device
+ * @midc: channel to scan
+ *
+ * Walk the descriptor chain for the device and process any entries
+ * that are complete.
+ */
+static void midc_scan_descriptors(struct middma_device *mid,
+				struct intel_mid_dma_chan *midc)
+{
+	struct intel_mid_dma_desc *desc = NULL, *_desc = NULL;
+
+	/*tx is complete*/
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		if (desc->status == DMA_IN_PROGRESS)
+			midc_descriptor_complete(midc, desc);
+	}
+	return;
+	}
+/**
+ * midc_lli_fill_sg -		Helper function to convert
+ *				SG list to Linked List Items.
+ *@midc: Channel
+ *@desc: DMA descriptor
+ *@sglist: Pointer to SG list
+ *@sglen: SG list length
+ *@flags: DMA transaction flags
+ *
+ * Walk through the SG list and convert the SG list into Linked
+ * List Items (LLI).
+ */
+static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
+				struct intel_mid_dma_desc *desc,
+				struct scatterlist *sglist,
+				unsigned int sglen,
+				unsigned int flags)
+{
+	struct intel_mid_dma_slave *mids;
+	struct scatterlist  *sg;
+	dma_addr_t lli_next, sg_phy_addr;
+	struct intel_mid_dma_lli *lli_bloc_desc;
+	union intel_mid_dma_ctl_lo ctl_lo;
+	union intel_mid_dma_ctl_hi ctl_hi;
+	int i;
+
+	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
+	mids = midc->mid_slave;
+
+	lli_bloc_desc = desc->lli;
+	lli_next = desc->lli_phys;
+
+	ctl_lo.ctl_lo = desc->ctl_lo;
+	ctl_hi.ctl_hi = desc->ctl_hi;
+	for_each_sg(sglist, sg, sglen, i) {
+		/*Populate CTL_LOW and LLI values*/
+		if (i != sglen - 1) {
+			lli_next = lli_next +
+				sizeof(struct intel_mid_dma_lli);
+		} else {
+		/*Check for circular list, otherwise terminate LLI to ZERO*/
+			if (flags & DMA_PREP_CIRCULAR_LIST) {
+				pr_debug("MDMA: LLI is configured in circular mode\n");
+				lli_next = desc->lli_phys;
+			} else {
+				lli_next = 0;
+				ctl_lo.ctlx.llp_dst_en = 0;
+				ctl_lo.ctlx.llp_src_en = 0;
+			}
+		}
+		/*Populate CTL_HI values*/
+		ctl_hi.ctlx.block_ts = get_block_ts(sg->length,
+							desc->width,
+							midc->dma->block_size);
+		/*Populate SAR and DAR values*/
+		sg_phy_addr = sg_phys(sg);
+		if (desc->dirn ==  DMA_MEM_TO_DEV) {
+			lli_bloc_desc->sar  = sg_phy_addr;
+			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
+		} else if (desc->dirn ==  DMA_DEV_TO_MEM) {
+			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
+			lli_bloc_desc->dar  = sg_phy_addr;
+		}
+		/*Copy values into block descriptor in system memroy*/
+		lli_bloc_desc->llp = lli_next;
+		lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo;
+		lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi;
+
+		lli_bloc_desc++;
+	}
+	/*Copy very first LLI values to descriptor*/
+	desc->ctl_lo = desc->lli->ctl_lo;
+	desc->ctl_hi = desc->lli->ctl_hi;
+	desc->sar = desc->lli->sar;
+	desc->dar = desc->lli->dar;
+
+	return 0;
+}
+/*****************************************************************************
+DMA engine callback Functions*/
+/**
+ * intel_mid_dma_tx_submit -	callback to submit DMA transaction
+ * @tx: dma engine descriptor
+ *
+ * Submit the DMA trasaction for this descriptor, start if ch idle
+ */
+static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct intel_mid_dma_desc	*desc = to_intel_mid_dma_desc(tx);
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+
+	spin_lock_bh(&midc->lock);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&midc->active_list))
+		list_add_tail(&desc->desc_node, &midc->active_list);
+	else
+		list_add_tail(&desc->desc_node, &midc->queue);
+
+	midc_dostart(midc, desc);
+	spin_unlock_bh(&midc->lock);
+
+	return cookie;
+}
+
+/**
+ * intel_mid_dma_issue_pending -	callback to issue pending txn
+ * @chan: chan where pending trascation needs to be checked and submitted
+ *
+ * Call for scan to issue pending descriptors
+ */
+static void intel_mid_dma_issue_pending(struct dma_chan *chan)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+
+	spin_lock_bh(&midc->lock);
+	if (!list_empty(&midc->queue))
+		midc_scan_descriptors(to_middma_device(chan->device), midc);
+	spin_unlock_bh(&midc->lock);
+}
+
+/**
+ * intel_mid_dma_tx_status -	Return status of txn
+ * @chan: chan for where status needs to be checked
+ * @cookie: cookie for txn
+ * @txstate: DMA txn state
+ *
+ * Return status of DMA txn
+ */
+static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
+						dma_cookie_t cookie,
+						struct dma_tx_state *txstate)
+{
+	struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS) {
+		spin_lock_bh(&midc->lock);
+		midc_scan_descriptors(to_middma_device(chan->device), midc);
+		spin_unlock_bh(&midc->lock);
+
+		ret = dma_cookie_status(chan, cookie, txstate);
+	}
+
+	return ret;
+}
+
+static int dma_slave_control(struct dma_chan *chan, unsigned long arg)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct dma_slave_config  *slave = (struct dma_slave_config *)arg;
+	struct intel_mid_dma_slave *mid_slave;
+
+	BUG_ON(!midc);
+	BUG_ON(!slave);
+	pr_debug("MDMA: slave control called\n");
+
+	mid_slave = to_intel_mid_dma_slave(slave);
+
+	BUG_ON(!mid_slave);
+
+	midc->mid_slave = mid_slave;
+	return 0;
+}
+/**
+ * intel_mid_dma_device_control -	DMA device control
+ * @chan: chan for DMA control
+ * @cmd: control cmd
+ * @arg: cmd arg value
+ *
+ * Perform DMA control command
+ */
+static int intel_mid_dma_device_control(struct dma_chan *chan,
+			enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct middma_device	*mid = to_middma_device(chan->device);
+	struct intel_mid_dma_desc	*desc, *_desc;
+	union intel_mid_dma_cfg_lo cfg_lo;
+
+	if (cmd == DMA_SLAVE_CONFIG)
+		return dma_slave_control(chan, arg);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	spin_lock_bh(&midc->lock);
+	if (midc->busy == false) {
+		spin_unlock_bh(&midc->lock);
+		return 0;
+	}
+	/*Suspend and disable the channel*/
+	cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW);
+	cfg_lo.cfgx.ch_susp = 1;
+	iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW);
+	iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
+	midc->busy = false;
+	/* Disable interrupts */
+	disable_dma_interrupt(midc);
+	midc->descs_allocated = 0;
+
+	spin_unlock_bh(&midc->lock);
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		if (desc->lli != NULL) {
+			pci_pool_free(desc->lli_pool, desc->lli,
+						desc->lli_phys);
+			pci_pool_destroy(desc->lli_pool);
+			desc->lli = NULL;
+		}
+		list_move(&desc->desc_node, &midc->free_list);
+	}
+	return 0;
+}
+
+
+/**
+ * intel_mid_dma_prep_memcpy -	Prep memcpy txn
+ * @chan: chan for DMA transfer
+ * @dest: destn address
+ * @src: src address
+ * @len: DMA transfer len
+ * @flags: DMA flags
+ *
+ * Perform a DMA memcpy. Note we support slave periphral DMA transfers only
+ * The periphral txn details should be filled in slave structure properly
+ * Returns the descriptor for this txn
+ */
+static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
+			struct dma_chan *chan, dma_addr_t dest,
+			dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct intel_mid_dma_chan *midc;
+	struct intel_mid_dma_desc *desc = NULL;
+	struct intel_mid_dma_slave *mids;
+	union intel_mid_dma_ctl_lo ctl_lo;
+	union intel_mid_dma_ctl_hi ctl_hi;
+	union intel_mid_dma_cfg_lo cfg_lo;
+	union intel_mid_dma_cfg_hi cfg_hi;
+	enum dma_slave_buswidth width;
+
+	pr_debug("MDMA: Prep for memcpy\n");
+	BUG_ON(!chan);
+	if (!len)
+		return NULL;
+
+	midc = to_intel_mid_dma_chan(chan);
+	BUG_ON(!midc);
+
+	mids = midc->mid_slave;
+	BUG_ON(!mids);
+
+	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
+				midc->dma->pci_id, midc->ch_id, len);
+	pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
+			mids->cfg_mode, mids->dma_slave.direction,
+			mids->hs_mode, mids->dma_slave.src_addr_width);
+
+	/*calculate CFG_LO*/
+	if (mids->hs_mode == LNW_DMA_SW_HS) {
+		cfg_lo.cfg_lo = 0;
+		cfg_lo.cfgx.hs_sel_dst = 1;
+		cfg_lo.cfgx.hs_sel_src = 1;
+	} else if (mids->hs_mode == LNW_DMA_HW_HS)
+		cfg_lo.cfg_lo = 0x00000;
+
+	/*calculate CFG_HI*/
+	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
+		/*SW HS only*/
+		cfg_hi.cfg_hi = 0;
+	} else {
+		cfg_hi.cfg_hi = 0;
+		if (midc->dma->pimr_mask) {
+			cfg_hi.cfgx.protctl = 0x0; /*default value*/
+			cfg_hi.cfgx.fifo_mode = 1;
+			if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
+				cfg_hi.cfgx.src_per = 0;
+				if (mids->device_instance == 0)
+					cfg_hi.cfgx.dst_per = 3;
+				if (mids->device_instance == 1)
+					cfg_hi.cfgx.dst_per = 1;
+			} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
+				if (mids->device_instance == 0)
+					cfg_hi.cfgx.src_per = 2;
+				if (mids->device_instance == 1)
+					cfg_hi.cfgx.src_per = 0;
+				cfg_hi.cfgx.dst_per = 0;
+			}
+		} else {
+			cfg_hi.cfgx.protctl = 0x1; /*default value*/
+			cfg_hi.cfgx.src_per = cfg_hi.cfgx.dst_per =
+					midc->ch_id - midc->dma->chan_base;
+		}
+	}
+
+	/*calculate CTL_HI*/
+	ctl_hi.ctlx.reser = 0;
+	ctl_hi.ctlx.done  = 0;
+	width = mids->dma_slave.src_addr_width;
+
+	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
+	pr_debug("MDMA:calc len %d for block size %d\n",
+				ctl_hi.ctlx.block_ts, midc->dma->block_size);
+	/*calculate CTL_LO*/
+	ctl_lo.ctl_lo = 0;
+	ctl_lo.ctlx.int_en = 1;
+	ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
+	ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
+
+	/*
+	 * Here we need some translation from "enum dma_slave_buswidth"
+	 * to the format for our dma controller
+	 *		standard	intel_mid_dmac's format
+	 *		 1 Byte			0b000
+	 *		 2 Bytes		0b001
+	 *		 4 Bytes		0b010
+	 */
+	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2;
+	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2;
+
+	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
+		ctl_lo.ctlx.tt_fc = 0;
+		ctl_lo.ctlx.sinc = 0;
+		ctl_lo.ctlx.dinc = 0;
+	} else {
+		if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
+			ctl_lo.ctlx.sinc = 0;
+			ctl_lo.ctlx.dinc = 2;
+			ctl_lo.ctlx.tt_fc = 1;
+		} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
+			ctl_lo.ctlx.sinc = 2;
+			ctl_lo.ctlx.dinc = 0;
+			ctl_lo.ctlx.tt_fc = 2;
+		}
+	}
+
+	pr_debug("MDMA:Calc CTL LO %x, CTL HI %x, CFG LO %x, CFG HI %x\n",
+		ctl_lo.ctl_lo, ctl_hi.ctl_hi, cfg_lo.cfg_lo, cfg_hi.cfg_hi);
+
+	enable_dma_interrupt(midc);
+
+	desc = midc_desc_get(midc);
+	if (desc == NULL)
+		goto err_desc_get;
+	desc->sar = src;
+	desc->dar = dest ;
+	desc->len = len;
+	desc->cfg_hi = cfg_hi.cfg_hi;
+	desc->cfg_lo = cfg_lo.cfg_lo;
+	desc->ctl_lo = ctl_lo.ctl_lo;
+	desc->ctl_hi = ctl_hi.ctl_hi;
+	desc->width = width;
+	desc->dirn = mids->dma_slave.direction;
+	desc->lli_phys = 0;
+	desc->lli = NULL;
+	desc->lli_pool = NULL;
+	return &desc->txd;
+
+err_desc_get:
+	pr_err("ERR_MDMA: Failed to get desc\n");
+	midc_desc_put(midc, desc);
+	return NULL;
+}
+/**
+ * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
+ * @chan: chan for DMA transfer
+ * @sgl: scatter gather list
+ * @sg_len: length of sg txn
+ * @direction: DMA transfer dirtn
+ * @flags: DMA flags
+ * @context: transfer context (ignored)
+ *
+ * Prepares LLI based periphral transfer
+ */
+static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
+			struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction direction,
+			unsigned long flags, void *context)
+{
+	struct intel_mid_dma_chan *midc = NULL;
+	struct intel_mid_dma_slave *mids = NULL;
+	struct intel_mid_dma_desc *desc = NULL;
+	struct dma_async_tx_descriptor *txd = NULL;
+	union intel_mid_dma_ctl_lo ctl_lo;
+
+	pr_debug("MDMA: Prep for slave SG\n");
+
+	if (!sg_len) {
+		pr_err("MDMA: Invalid SG length\n");
+		return NULL;
+	}
+	midc = to_intel_mid_dma_chan(chan);
+	BUG_ON(!midc);
+
+	mids = midc->mid_slave;
+	BUG_ON(!mids);
+
+	if (!midc->dma->pimr_mask) {
+		/* We can still handle sg list with only one item */
+		if (sg_len == 1) {
+			txd = intel_mid_dma_prep_memcpy(chan,
+						mids->dma_slave.dst_addr,
+						mids->dma_slave.src_addr,
+						sgl->length,
+						flags);
+			return txd;
+		} else {
+			pr_warn("MDMA: SG list is not supported by this controller\n");
+			return  NULL;
+		}
+	}
+
+	pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
+			sg_len, direction, flags);
+
+	txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sgl->length, flags);
+	if (NULL == txd) {
+		pr_err("MDMA: Prep memcpy failed\n");
+		return NULL;
+	}
+
+	desc = to_intel_mid_dma_desc(txd);
+	desc->dirn = direction;
+	ctl_lo.ctl_lo = desc->ctl_lo;
+	ctl_lo.ctlx.llp_dst_en = 1;
+	ctl_lo.ctlx.llp_src_en = 1;
+	desc->ctl_lo = ctl_lo.ctl_lo;
+	desc->lli_length = sg_len;
+	desc->current_lli = 0;
+	/* DMA coherent memory pool for LLI descriptors*/
+	desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool",
+				midc->dma->pdev,
+				(sizeof(struct intel_mid_dma_lli)*sg_len),
+				32, 0);
+	if (NULL == desc->lli_pool) {
+		pr_err("MID_DMA:LLI pool create failed\n");
+		return NULL;
+	}
+
+	desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys);
+	if (!desc->lli) {
+		pr_err("MID_DMA: LLI alloc failed\n");
+		pci_pool_destroy(desc->lli_pool);
+		return NULL;
+	}
+
+	midc_lli_fill_sg(midc, desc, sgl, sg_len, flags);
+	if (flags & DMA_PREP_INTERRUPT) {
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				midc->dma_base + MASK_BLOCK);
+		pr_debug("MDMA:Enabled Block interrupt\n");
+	}
+	return &desc->txd;
+}
+
+/**
+ * intel_mid_dma_free_chan_resources -	Frees dma resources
+ * @chan: chan requiring attention
+ *
+ * Frees the allocated resources on this DMA chan
+ */
+static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct middma_device	*mid = to_middma_device(chan->device);
+	struct intel_mid_dma_desc	*desc, *_desc;
+
+	if (true == midc->busy) {
+		/*trying to free ch in use!!!!!*/
+		pr_err("ERR_MDMA: trying to free ch in use\n");
+	}
+	spin_lock_bh(&midc->lock);
+	midc->descs_allocated = 0;
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		list_del(&desc->desc_node);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
+		list_del(&desc->desc_node);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	list_for_each_entry_safe(desc, _desc, &midc->queue, desc_node) {
+		list_del(&desc->desc_node);
+		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	}
+	spin_unlock_bh(&midc->lock);
+	midc->in_use = false;
+	midc->busy = false;
+	/* Disable CH interrupts */
+	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
+	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
+	pm_runtime_put(&mid->pdev->dev);
+}
+
+/**
+ * intel_mid_dma_alloc_chan_resources -	Allocate dma resources
+ * @chan: chan requiring attention
+ *
+ * Allocates DMA resources on this chan
+ * Return the descriptors allocated
+ */
+static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct middma_device	*mid = to_middma_device(chan->device);
+	struct intel_mid_dma_desc	*desc;
+	dma_addr_t		phys;
+	int	i = 0;
+
+	pm_runtime_get_sync(&mid->pdev->dev);
+
+	if (mid->state == SUSPENDED) {
+		if (dma_resume(&mid->pdev->dev)) {
+			pr_err("ERR_MDMA: resume failed");
+			return -EFAULT;
+		}
+	}
+
+	/* ASSERT:  channel is idle */
+	if (test_ch_en(mid->dma_base, midc->ch_id)) {
+		/*ch is not idle*/
+		pr_err("ERR_MDMA: ch not idle\n");
+		pm_runtime_put(&mid->pdev->dev);
+		return -EIO;
+	}
+	dma_cookie_init(chan);
+
+	spin_lock_bh(&midc->lock);
+	while (midc->descs_allocated < DESCS_PER_CHANNEL) {
+		spin_unlock_bh(&midc->lock);
+		desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys);
+		if (!desc) {
+			pr_err("ERR_MDMA: desc failed\n");
+			pm_runtime_put(&mid->pdev->dev);
+			return -ENOMEM;
+			/*check*/
+		}
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = intel_mid_dma_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.phys = phys;
+		spin_lock_bh(&midc->lock);
+		i = ++midc->descs_allocated;
+		list_add_tail(&desc->desc_node, &midc->free_list);
+	}
+	spin_unlock_bh(&midc->lock);
+	midc->in_use = true;
+	midc->busy = false;
+	pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i);
+	return i;
+}
+
+/**
+ * midc_handle_error -	Handle DMA txn error
+ * @mid: controller where error occurred
+ * @midc: chan where error occurred
+ *
+ * Scan the descriptor for error
+ */
+static void midc_handle_error(struct middma_device *mid,
+		struct intel_mid_dma_chan *midc)
+{
+	midc_scan_descriptors(mid, midc);
+}
+
+/**
+ * dma_tasklet -	DMA interrupt tasklet
+ * @data: tasklet arg (the controller structure)
+ *
+ * Scan the controller for interrupts for completion/error
+ * Clear the interrupt and call for handling completion/error
+ */
+static void dma_tasklet(unsigned long data)
+{
+	struct middma_device *mid = NULL;
+	struct intel_mid_dma_chan *midc = NULL;
+	u32 status, raw_tfr, raw_block;
+	int i;
+
+	mid = (struct middma_device *)data;
+	if (mid == NULL) {
+		pr_err("ERR_MDMA: tasklet Null param\n");
+		return;
+	}
+	pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
+	raw_tfr = ioread32(mid->dma_base + RAW_TFR);
+	raw_block = ioread32(mid->dma_base + RAW_BLOCK);
+	status = raw_tfr | raw_block;
+	status &= mid->intr_mask;
+	while (status) {
+		/*txn interrupt*/
+		i = get_ch_index(&status, mid->chan_base);
+		if (i < 0) {
+			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
+			return;
+		}
+		midc = &mid->ch[i];
+		if (midc == NULL) {
+			pr_err("ERR_MDMA:Null param midc\n");
+			return;
+		}
+		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
+				status, midc->ch_id, i);
+		midc->raw_tfr = raw_tfr;
+		midc->raw_block = raw_block;
+		spin_lock_bh(&midc->lock);
+		/*clearing this interrupts first*/
+		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
+		if (raw_block) {
+			iowrite32((1 << midc->ch_id),
+				mid->dma_base + CLEAR_BLOCK);
+		}
+		midc_scan_descriptors(mid, midc);
+		pr_debug("MDMA:Scan of desc... complete, unmasking\n");
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_TFR);
+		if (raw_block) {
+			iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_BLOCK);
+		}
+		spin_unlock_bh(&midc->lock);
+	}
+
+	status = ioread32(mid->dma_base + RAW_ERR);
+	status &= mid->intr_mask;
+	while (status) {
+		/*err interrupt*/
+		i = get_ch_index(&status, mid->chan_base);
+		if (i < 0) {
+			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
+			return;
+		}
+		midc = &mid->ch[i];
+		if (midc == NULL) {
+			pr_err("ERR_MDMA:Null param midc\n");
+			return;
+		}
+		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
+				status, midc->ch_id, i);
+
+		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_ERR);
+		spin_lock_bh(&midc->lock);
+		midc_handle_error(mid, midc);
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_ERR);
+		spin_unlock_bh(&midc->lock);
+	}
+	pr_debug("MDMA:Exiting takslet...\n");
+	return;
+}
+
+static void dma_tasklet1(unsigned long data)
+{
+	pr_debug("MDMA:in takslet1...\n");
+	return dma_tasklet(data);
+}
+
+static void dma_tasklet2(unsigned long data)
+{
+	pr_debug("MDMA:in takslet2...\n");
+	return dma_tasklet(data);
+}
+
+/**
+ * intel_mid_dma_interrupt -	DMA ISR
+ * @irq: IRQ where interrupt occurred
+ * @data: ISR cllback data (the controller structure)
+ *
+ * See if this is our interrupt if so then schedule the tasklet
+ * otherwise ignore
+ */
+static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
+{
+	struct middma_device *mid = data;
+	u32 tfr_status, err_status;
+	int call_tasklet = 0;
+
+	tfr_status = ioread32(mid->dma_base + RAW_TFR);
+	err_status = ioread32(mid->dma_base + RAW_ERR);
+	if (!tfr_status && !err_status)
+		return IRQ_NONE;
+
+	/*DMA Interrupt*/
+	pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
+	pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask);
+	tfr_status &= mid->intr_mask;
+	if (tfr_status) {
+		/*need to disable intr*/
+		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR);
+		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK);
+		pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
+		call_tasklet = 1;
+	}
+	err_status &= mid->intr_mask;
+	if (err_status) {
+		iowrite32((err_status << INT_MASK_WE),
+			  mid->dma_base + MASK_ERR);
+		call_tasklet = 1;
+	}
+	if (call_tasklet)
+		tasklet_schedule(&mid->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t intel_mid_dma_interrupt1(int irq, void *data)
+{
+	return intel_mid_dma_interrupt(irq, data);
+}
+
+static irqreturn_t intel_mid_dma_interrupt2(int irq, void *data)
+{
+	return intel_mid_dma_interrupt(irq, data);
+}
+
+/**
+ * mid_setup_dma -	Setup the DMA controller
+ * @pdev: Controller PCI device structure
+ *
+ * Initialize the DMA controller, channels, registers with DMA engine,
+ * ISR. Initialize DMA controller channels.
+ */
+static int mid_setup_dma(struct pci_dev *pdev)
+{
+	struct middma_device *dma = pci_get_drvdata(pdev);
+	int err, i;
+
+	/* DMA coherent memory pool for DMA descriptor allocations */
+	dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev,
+					sizeof(struct intel_mid_dma_desc),
+					32, 0);
+	if (NULL == dma->dma_pool) {
+		pr_err("ERR_MDMA:pci_pool_create failed\n");
+		err = -ENOMEM;
+		goto err_dma_pool;
+	}
+
+	INIT_LIST_HEAD(&dma->common.channels);
+	dma->pci_id = pdev->device;
+	if (dma->pimr_mask) {
+		dma->mask_reg = ioremap(LNW_PERIPHRAL_MASK_BASE,
+					LNW_PERIPHRAL_MASK_SIZE);
+		if (dma->mask_reg == NULL) {
+			pr_err("ERR_MDMA:Can't map periphral intr space !!\n");
+			err = -ENOMEM;
+			goto err_ioremap;
+		}
+	} else
+		dma->mask_reg = NULL;
+
+	pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan);
+	/*init CH structures*/
+	dma->intr_mask = 0;
+	dma->state = RUNNING;
+	for (i = 0; i < dma->max_chan; i++) {
+		struct intel_mid_dma_chan *midch = &dma->ch[i];
+
+		midch->chan.device = &dma->common;
+		dma_cookie_init(&midch->chan);
+		midch->ch_id = dma->chan_base + i;
+		pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
+
+		midch->dma_base = dma->dma_base;
+		midch->ch_regs = dma->dma_base + DMA_CH_SIZE * midch->ch_id;
+		midch->dma = dma;
+		dma->intr_mask |= 1 << (dma->chan_base + i);
+		spin_lock_init(&midch->lock);
+
+		INIT_LIST_HEAD(&midch->active_list);
+		INIT_LIST_HEAD(&midch->queue);
+		INIT_LIST_HEAD(&midch->free_list);
+		/*mask interrupts*/
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_BLOCK);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_SRC_TRAN);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_DST_TRAN);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_ERR);
+		iowrite32(MASK_INTR_REG(midch->ch_id),
+			dma->dma_base + MASK_TFR);
+
+		disable_dma_interrupt(midch);
+		list_add_tail(&midch->chan.device_node, &dma->common.channels);
+	}
+	pr_debug("MDMA: Calc Mask as %x for this controller\n", dma->intr_mask);
+
+	/*init dma structure*/
+	dma_cap_zero(dma->common.cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma->common.cap_mask);
+	dma_cap_set(DMA_SLAVE, dma->common.cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->common.cap_mask);
+	dma->common.dev = &pdev->dev;
+
+	dma->common.device_alloc_chan_resources =
+					intel_mid_dma_alloc_chan_resources;
+	dma->common.device_free_chan_resources =
+					intel_mid_dma_free_chan_resources;
+
+	dma->common.device_tx_status = intel_mid_dma_tx_status;
+	dma->common.device_prep_dma_memcpy = intel_mid_dma_prep_memcpy;
+	dma->common.device_issue_pending = intel_mid_dma_issue_pending;
+	dma->common.device_prep_slave_sg = intel_mid_dma_prep_slave_sg;
+	dma->common.device_control = intel_mid_dma_device_control;
+
+	/*enable dma cntrl*/
+	iowrite32(REG_BIT0, dma->dma_base + DMA_CFG);
+
+	/*register irq */
+	if (dma->pimr_mask) {
+		pr_debug("MDMA:Requesting irq shared for DMAC1\n");
+		err = request_irq(pdev->irq, intel_mid_dma_interrupt1,
+			IRQF_SHARED, "INTEL_MID_DMAC1", dma);
+		if (0 != err)
+			goto err_irq;
+	} else {
+		dma->intr_mask = 0x03;
+		pr_debug("MDMA:Requesting irq for DMAC2\n");
+		err = request_irq(pdev->irq, intel_mid_dma_interrupt2,
+			IRQF_SHARED, "INTEL_MID_DMAC2", dma);
+		if (0 != err)
+			goto err_irq;
+	}
+	/*register device w/ engine*/
+	err = dma_async_device_register(&dma->common);
+	if (0 != err) {
+		pr_err("ERR_MDMA:device_register failed: %d\n", err);
+		goto err_engine;
+	}
+	if (dma->pimr_mask) {
+		pr_debug("setting up tasklet1 for DMAC1\n");
+		tasklet_init(&dma->tasklet, dma_tasklet1, (unsigned long)dma);
+	} else {
+		pr_debug("setting up tasklet2 for DMAC2\n");
+		tasklet_init(&dma->tasklet, dma_tasklet2, (unsigned long)dma);
+	}
+	return 0;
+
+err_engine:
+	free_irq(pdev->irq, dma);
+err_irq:
+	if (dma->mask_reg)
+		iounmap(dma->mask_reg);
+err_ioremap:
+	pci_pool_destroy(dma->dma_pool);
+err_dma_pool:
+	pr_err("ERR_MDMA:setup_dma failed: %d\n", err);
+	return err;
+
+}
+
+/**
+ * middma_shutdown -	Shutdown the DMA controller
+ * @pdev: Controller PCI device structure
+ *
+ * Called by remove
+ * Unregister DMa controller, clear all structures and free interrupt
+ */
+static void middma_shutdown(struct pci_dev *pdev)
+{
+	struct middma_device *device = pci_get_drvdata(pdev);
+
+	dma_async_device_unregister(&device->common);
+	pci_pool_destroy(device->dma_pool);
+	if (device->mask_reg)
+		iounmap(device->mask_reg);
+	if (device->dma_base)
+		iounmap(device->dma_base);
+	free_irq(pdev->irq, device);
+	return;
+}
+
+/**
+ * intel_mid_dma_probe -	PCI Probe
+ * @pdev: Controller PCI device structure
+ * @id: pci device id structure
+ *
+ * Initialize the PCI device, map BARs, query driver data.
+ * Call setup_dma to complete contoller and chan initilzation
+ */
+static int __devinit intel_mid_dma_probe(struct pci_dev *pdev,
+					const struct pci_device_id *id)
+{
+	struct middma_device *device;
+	u32 base_addr, bar_size;
+	struct intel_mid_dma_probe_info *info;
+	int err;
+
+	pr_debug("MDMA: probe for %x\n", pdev->device);
+	info = (void *)id->driver_data;
+	pr_debug("MDMA: CH %d, base %d, block len %d, Periphral mask %x\n",
+				info->max_chan, info->ch_base,
+				info->block_size, info->pimr_mask);
+
+	err = pci_enable_device(pdev);
+	if (err)
+		goto err_enable_device;
+
+	err = pci_request_regions(pdev, "intel_mid_dmac");
+	if (err)
+		goto err_request_regions;
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		goto err_set_dma_mask;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		goto err_set_dma_mask;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device) {
+		pr_err("ERR_MDMA:kzalloc failed probe\n");
+		err = -ENOMEM;
+		goto err_kzalloc;
+	}
+	device->pdev = pci_dev_get(pdev);
+
+	base_addr = pci_resource_start(pdev, 0);
+	bar_size  = pci_resource_len(pdev, 0);
+	device->dma_base = ioremap_nocache(base_addr, DMA_REG_SIZE);
+	if (!device->dma_base) {
+		pr_err("ERR_MDMA:ioremap failed\n");
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+	pci_set_drvdata(pdev, device);
+	pci_set_master(pdev);
+	device->max_chan = info->max_chan;
+	device->chan_base = info->ch_base;
+	device->block_size = info->block_size;
+	device->pimr_mask = info->pimr_mask;
+
+	err = mid_setup_dma(pdev);
+	if (err)
+		goto err_dma;
+
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_allow(&pdev->dev);
+	return 0;
+
+err_dma:
+	iounmap(device->dma_base);
+err_ioremap:
+	pci_dev_put(pdev);
+	kfree(device);
+err_kzalloc:
+err_set_dma_mask:
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+err_request_regions:
+err_enable_device:
+	pr_err("ERR_MDMA:Probe failed %d\n", err);
+	return err;
+}
+
+/**
+ * intel_mid_dma_remove -	PCI remove
+ * @pdev: Controller PCI device structure
+ *
+ * Free up all resources and data
+ * Call shutdown_dma to complete contoller and chan cleanup
+ */
+static void __devexit intel_mid_dma_remove(struct pci_dev *pdev)
+{
+	struct middma_device *device = pci_get_drvdata(pdev);
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_forbid(&pdev->dev);
+	middma_shutdown(pdev);
+	pci_dev_put(pdev);
+	kfree(device);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+/* Power Management */
+/*
+* dma_suspend - PCI suspend function
+*
+* @pci: PCI device structure
+* @state: PM message
+*
+* This function is called by OS when a power event occurs
+*/
+static int dma_suspend(struct device *dev)
+{
+	struct pci_dev *pci = to_pci_dev(dev);
+	int i;
+	struct middma_device *device = pci_get_drvdata(pci);
+	pr_debug("MDMA: dma_suspend called\n");
+
+	for (i = 0; i < device->max_chan; i++) {
+		if (device->ch[i].in_use)
+			return -EAGAIN;
+	}
+	dmac1_mask_periphral_intr(device);
+	device->state = SUSPENDED;
+	pci_save_state(pci);
+	pci_disable_device(pci);
+	pci_set_power_state(pci, PCI_D3hot);
+	return 0;
+}
+
+/**
+* dma_resume - PCI resume function
+*
+* @pci:	PCI device structure
+*
+* This function is called by OS when a power event occurs
+*/
+int dma_resume(struct device *dev)
+{
+	struct pci_dev *pci = to_pci_dev(dev);
+	int ret;
+	struct middma_device *device = pci_get_drvdata(pci);
+
+	pr_debug("MDMA: dma_resume called\n");
+	pci_set_power_state(pci, PCI_D0);
+	pci_restore_state(pci);
+	ret = pci_enable_device(pci);
+	if (ret) {
+		pr_err("MDMA: device can't be enabled for %x\n", pci->device);
+		return ret;
+	}
+	device->state = RUNNING;
+	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
+	return 0;
+}
+
+static int dma_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct middma_device *device = pci_get_drvdata(pci_dev);
+
+	device->state = SUSPENDED;
+	return 0;
+}
+
+static int dma_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct middma_device *device = pci_get_drvdata(pci_dev);
+
+	device->state = RUNNING;
+	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
+	return 0;
+}
+
+static int dma_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct middma_device *device = pci_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < device->max_chan; i++) {
+		if (device->ch[i].in_use)
+			return -EAGAIN;
+	}
+
+	return pm_schedule_suspend(dev, 0);
+}
+
+/******************************************************************************
+* PCI stuff
+*/
+static struct pci_device_id intel_mid_dma_ids[] = {
+	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC1_ID),	INFO(2, 6, 4095, 0x200020)},
+	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC2_ID),	INFO(2, 0, 2047, 0)},
+	{ PCI_VDEVICE(INTEL, INTEL_MID_GP_DMAC2_ID),	INFO(2, 0, 2047, 0)},
+	{ PCI_VDEVICE(INTEL, INTEL_MFLD_DMAC1_ID),	INFO(4, 0, 4095, 0x400040)},
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids);
+
+static const struct dev_pm_ops intel_mid_dma_pm = {
+	.runtime_suspend = dma_runtime_suspend,
+	.runtime_resume = dma_runtime_resume,
+	.runtime_idle = dma_runtime_idle,
+	.suspend = dma_suspend,
+	.resume = dma_resume,
+};
+
+static struct pci_driver intel_mid_dma_pci_driver = {
+	.name		=	"Intel MID DMA",
+	.id_table	=	intel_mid_dma_ids,
+	.probe		=	intel_mid_dma_probe,
+	.remove		=	__devexit_p(intel_mid_dma_remove),
+#ifdef CONFIG_PM
+	.driver = {
+		.pm = &intel_mid_dma_pm,
+	},
+#endif
+};
+
+static int __init intel_mid_dma_init(void)
+{
+	pr_debug("INFO_MDMA: LNW DMA Driver Version %s\n",
+			INTEL_MID_DMA_DRIVER_VERSION);
+	return pci_register_driver(&intel_mid_dma_pci_driver);
+}
+fs_initcall(intel_mid_dma_init);
+
+static void __exit intel_mid_dma_exit(void)
+{
+	pci_unregister_driver(&intel_mid_dma_pci_driver);
+}
+module_exit(intel_mid_dma_exit);
+
+MODULE_AUTHOR("Vinod Koul <vinod.koul@intel.com>");
+MODULE_DESCRIPTION("Intel (R) MID DMAC Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(INTEL_MID_DMA_DRIVER_VERSION);
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
new file mode 100644
index 00000000..1bfa9268
--- /dev/null
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -0,0 +1,299 @@
+/*
+ *  intel_mid_dma_regs.h - Intel MID DMA Drivers
+ *
+ *  Copyright (C) 2008-10 Intel Corp
+ *  Author: Vinod Koul <vinod.koul@intel.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *
+ */
+#ifndef __INTEL_MID_DMAC_REGS_H__
+#define __INTEL_MID_DMAC_REGS_H__
+
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/pci_ids.h>
+
+#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0"
+
+#define	REG_BIT0		0x00000001
+#define	REG_BIT8		0x00000100
+#define INT_MASK_WE		0x8
+#define CLEAR_DONE		0xFFFFEFFF
+#define UNMASK_INTR_REG(chan_num) \
+	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
+#define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
+
+#define ENABLE_CHANNEL(chan_num) \
+	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
+
+#define DISABLE_CHANNEL(chan_num) \
+	(REG_BIT8 << chan_num)
+
+#define DESCS_PER_CHANNEL	16
+/*DMA Registers*/
+/*registers associated with channel programming*/
+#define DMA_REG_SIZE		0x400
+#define DMA_CH_SIZE		0x58
+
+/*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
+#define SAR			0x00 /* Source Address Register*/
+#define DAR			0x08 /* Destination Address Register*/
+#define LLP			0x10 /* Linked List Pointer Register*/
+#define CTL_LOW			0x18 /* Control Register*/
+#define CTL_HIGH		0x1C /* Control Register*/
+#define CFG_LOW			0x40 /* Configuration Register Low*/
+#define CFG_HIGH		0x44 /* Configuration Register high*/
+
+#define STATUS_TFR		0x2E8
+#define STATUS_BLOCK		0x2F0
+#define STATUS_ERR		0x308
+
+#define RAW_TFR			0x2C0
+#define RAW_BLOCK		0x2C8
+#define RAW_ERR			0x2E0
+
+#define MASK_TFR		0x310
+#define MASK_BLOCK		0x318
+#define MASK_SRC_TRAN		0x320
+#define MASK_DST_TRAN		0x328
+#define MASK_ERR		0x330
+
+#define CLEAR_TFR		0x338
+#define CLEAR_BLOCK		0x340
+#define CLEAR_SRC_TRAN		0x348
+#define CLEAR_DST_TRAN		0x350
+#define CLEAR_ERR		0x358
+
+#define INTR_STATUS		0x360
+#define DMA_CFG			0x398
+#define DMA_CHAN_EN		0x3A0
+
+/*DMA channel control registers*/
+union intel_mid_dma_ctl_lo {
+	struct {
+		u32	int_en:1;	/*enable or disable interrupts*/
+					/*should be 0*/
+		u32	dst_tr_width:3;	/*destination transfer width*/
+					/*usually 32 bits = 010*/
+		u32	src_tr_width:3; /*source transfer width*/
+					/*usually 32 bits = 010*/
+		u32	dinc:2;		/*destination address inc/dec*/
+					/*For mem:INC=00, Periphral NoINC=11*/
+		u32	sinc:2;		/*source address inc or dec, as above*/
+		u32	dst_msize:3;	/*destination burst transaction length*/
+					/*always = 16 ie 011*/
+		u32	src_msize:3;	/*source burst transaction length*/
+					/*always = 16 ie 011*/
+		u32	reser1:3;
+		u32	tt_fc:3;	/*transfer type and flow controller*/
+					/*M-M = 000
+					  P-M = 010
+					  M-P = 001*/
+		u32	dms:2;		/*destination master select = 0*/
+		u32	sms:2;		/*source master select = 0*/
+		u32	llp_dst_en:1;	/*enable/disable destination LLP = 0*/
+		u32	llp_src_en:1;	/*enable/disable source LLP = 0*/
+		u32	reser2:3;
+	} ctlx;
+	u32	ctl_lo;
+};
+
+union intel_mid_dma_ctl_hi {
+	struct {
+		u32	block_ts:12;	/*block transfer size*/
+		u32	done:1;		/*Done - updated by DMAC*/
+		u32	reser:19;	/*configured by DMAC*/
+	} ctlx;
+	u32	ctl_hi;
+
+};
+
+/*DMA channel configuration registers*/
+union intel_mid_dma_cfg_lo {
+	struct {
+		u32	reser1:5;
+		u32	ch_prior:3;	/*channel priority = 0*/
+		u32	ch_susp:1;	/*channel suspend = 0*/
+		u32	fifo_empty:1;	/*FIFO empty or not R bit = 0*/
+		u32	hs_sel_dst:1;	/*select HW/SW destn handshaking*/
+					/*HW = 0, SW = 1*/
+		u32	hs_sel_src:1;	/*select HW/SW src handshaking*/
+		u32	reser2:6;
+		u32	dst_hs_pol:1;	/*dest HS interface polarity*/
+		u32	src_hs_pol:1;	/*src HS interface polarity*/
+		u32	max_abrst:10;	/*max AMBA burst len = 0 (no sw limit*/
+		u32	reload_src:1;	/*auto reload src addr =1 if src is P*/
+		u32	reload_dst:1;	/*AR destn addr =1 if dstn is P*/
+	} cfgx;
+	u32	cfg_lo;
+};
+
+union intel_mid_dma_cfg_hi {
+	struct {
+		u32	fcmode:1;	/*flow control mode = 1*/
+		u32	fifo_mode:1;	/*FIFO mode select = 1*/
+		u32	protctl:3;	/*protection control = 0*/
+		u32	rsvd:2;
+		u32	src_per:4;	/*src hw HS interface*/
+		u32	dst_per:4;	/*dstn hw HS interface*/
+		u32	reser2:17;
+	} cfgx;
+	u32	cfg_hi;
+};
+
+
+/**
+ * struct intel_mid_dma_chan - internal mid representation of a DMA channel
+ * @chan: dma_chan strcture represetation for mid chan
+ * @ch_regs: MMIO register space pointer to channel register
+ * @dma_base: MMIO register space DMA engine base pointer
+ * @ch_id: DMA channel id
+ * @lock: channel spinlock
+ * @active_list: current active descriptors
+ * @queue: current queued up descriptors
+ * @free_list: current free descriptors
+ * @slave: dma slave struture
+ * @descs_allocated: total number of decsiptors allocated
+ * @dma: dma device struture pointer
+ * @busy: bool representing if ch is busy (active txn) or not
+ * @in_use: bool representing if ch is in use or not
+ * @raw_tfr: raw trf interrupt received
+ * @raw_block: raw block interrupt received
+ */
+struct intel_mid_dma_chan {
+	struct dma_chan		chan;
+	void __iomem		*ch_regs;
+	void __iomem		*dma_base;
+	int			ch_id;
+	spinlock_t		lock;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		descs_allocated;
+	struct middma_device	*dma;
+	bool			busy;
+	bool			in_use;
+	u32			raw_tfr;
+	u32			raw_block;
+	struct intel_mid_dma_slave *mid_slave;
+};
+
+static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
+						struct dma_chan *chan)
+{
+	return container_of(chan, struct intel_mid_dma_chan, chan);
+}
+
+enum intel_mid_dma_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
+/**
+ * struct middma_device - internal representation of a DMA device
+ * @pdev: PCI device
+ * @dma_base: MMIO register space pointer of DMA
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @tasklet: dma tasklet for processing interrupts
+ * @ch: per channel data
+ * @pci_id: DMA device PCI ID
+ * @intr_mask: Interrupt mask to be used
+ * @mask_reg: MMIO register for periphral mask
+ * @chan_base: Base ch index (read from driver data)
+ * @max_chan: max number of chs supported (from drv_data)
+ * @block_size: Block size of DMA transfer supported (from drv_data)
+ * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data)
+ * @state: dma PM device state
+ */
+struct middma_device {
+	struct pci_dev		*pdev;
+	void __iomem		*dma_base;
+	struct pci_pool		*dma_pool;
+	struct dma_device	common;
+	struct tasklet_struct   tasklet;
+	struct intel_mid_dma_chan ch[MAX_CHAN];
+	unsigned int		pci_id;
+	unsigned int		intr_mask;
+	void __iomem		*mask_reg;
+	int			chan_base;
+	int			max_chan;
+	int			block_size;
+	unsigned int		pimr_mask;
+	enum intel_mid_dma_state state;
+};
+
+static inline struct middma_device *to_middma_device(struct dma_device *common)
+{
+	return container_of(common, struct middma_device, common);
+}
+
+struct intel_mid_dma_desc {
+	void __iomem			*block; /*ch ptr*/
+	struct list_head		desc_node;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+	dma_addr_t			sar;
+	dma_addr_t			dar;
+	u32				cfg_hi;
+	u32				cfg_lo;
+	u32				ctl_lo;
+	u32				ctl_hi;
+	struct pci_pool			*lli_pool;
+	struct intel_mid_dma_lli	*lli;
+	dma_addr_t			lli_phys;
+	unsigned int			lli_length;
+	unsigned int			current_lli;
+	dma_addr_t			next;
+	enum dma_transfer_direction		dirn;
+	enum dma_status			status;
+	enum dma_slave_buswidth		width; /*width of DMA txn*/
+	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
+
+};
+
+struct intel_mid_dma_lli {
+	dma_addr_t			sar;
+	dma_addr_t			dar;
+	dma_addr_t			llp;
+	u32				ctl_lo;
+	u32				ctl_hi;
+} __attribute__ ((packed));
+
+static inline int test_ch_en(void __iomem *dma, u32 ch_no)
+{
+	u32 en_reg = ioread32(dma + DMA_CHAN_EN);
+	return (en_reg >> ch_no) & 0x1;
+}
+
+static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
+		(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct intel_mid_dma_desc, txd);
+}
+
+static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
+		(struct dma_slave_config *slave)
+{
+	return container_of(slave, struct intel_mid_dma_slave, dma_slave);
+}
+
+
+int dma_resume(struct device *dev);
+
+#endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 00000000..0ff7270a
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
new file mode 100644
index 00000000..abd9038e
--- /dev/null
+++ b/drivers/dma/ioat/dca.c
@@ -0,0 +1,684 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/* verify if tag map matches expected values */
+static inline int dca2_tag_map_valid(u8 *tag_map)
+{
+	return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
+		(tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
+		(tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
+		(tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
+		(tag_map[4] == DCA2_TAG_MAP_BYTE4));
+}
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device.  Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x)		(DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN	8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+	return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+	/* CPUID level 9 returns DCA configuration */
+	/* Bit 0 indicates DCA enabled by the BIOS */
+	unsigned long cpuid_level_9;
+	int res;
+
+	cpuid_level_9 = cpuid_eax(9);
+	res = test_bit(0, &cpuid_level_9);
+	if (!res)
+		dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n");
+
+	return res;
+}
+
+int system_has_dca_enabled(struct pci_dev *pdev)
+{
+	if (boot_cpu_has(X86_FEATURE_DCA))
+		return dca_enabled_in_bios(pdev);
+
+	dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+	return 0;
+}
+
+struct ioat_dca_slot {
+	struct pci_dev *pdev;	/* requester device */
+	u16 rid;		/* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+	void __iomem		*iobase;
+	void __iomem		*dca_base;
+	int			 max_requesters;
+	int			 requester_count;
+	u8			 tag_map[IOAT_TAG_MAP_LEN];
+	struct ioat_dca_slot 	 req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8]	PCI-Express Bus Number
+ * [7:3]	PCI-Express Device Number
+ * [2:0]	PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1]	Reserved (0)
+ * [0]		Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			writew(id, ioatdca->dca_base + (i * 4));
+			/* make sure the ignore function bit is off */
+			writeb(0, ioatdca->dca_base + (i * 4) + 2);
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+				     struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			writew(0, ioatdca->dca_base + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+			   struct device *dev,
+			   int cpu)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	int i, apic_id, bit, value;
+	u8 entry, tag;
+
+	tag = 0;
+	apic_id = cpu_physical_id(cpu);
+
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+		entry = ioatdca->tag_map[i];
+		if (entry & DCA_TAG_MAP_VALID) {
+			bit = entry & ~DCA_TAG_MAP_VALID;
+			value = (apic_id & (1 << bit)) ? 1 : 0;
+		} else {
+			value = entry ? 1 : 0;
+		}
+		tag |= (value << i);
+	}
+	return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+				struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+
+	pdev = to_pci_dev(dev);
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev)
+			return 1;
+	}
+	return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+	.add_requester		= ioat_dca_add_requester,
+	.remove_requester	= ioat_dca_remove_requester,
+	.get_tag		= ioat_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+
+struct dca_provider * __devinit
+ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	u8 *tag_map = NULL;
+	int i;
+	int err;
+	u8 version;
+	u8 max_requesters;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	/* I/OAT v1 systems must have a known tag_map to support DCA */
+	switch (pdev->vendor) {
+	case PCI_VENDOR_ID_INTEL:
+		switch (pdev->device) {
+		case PCI_DEVICE_ID_INTEL_IOAT:
+			tag_map = ioat_tag_map_BNB;
+			break;
+		case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+			tag_map = ioat_tag_map_CNB;
+			break;
+		case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+			tag_map = ioat_tag_map_SCNB;
+			break;
+		}
+		break;
+	case PCI_VENDOR_ID_UNISYS:
+		switch (pdev->device) {
+		case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+			tag_map = ioat_tag_map_UNISYS;
+			break;
+		}
+		break;
+	}
+	if (tag_map == NULL)
+		return NULL;
+
+	version = readb(iobase + IOAT_VER_OFFSET);
+	if (version == IOAT_VER_3_0)
+		max_requesters = IOAT3_DCA_MAX_REQ;
+	else
+		max_requesters = IOAT_DCA_MAX_REQ;
+
+	dca = alloc_dca_provider(&ioat_dca_ops,
+			sizeof(*ioatdca) +
+			(sizeof(struct ioat_dca_slot) * max_requesters));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->max_requesters = max_requesters;
+	ioatdca->dca_base = iobase + 0x54;
+
+	/* copy over the APIC ID to DCA tag mapping */
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+		ioatdca->tag_map[i] = tag_map[i];
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+			writel(id | IOAT_DCA_GREQID_VALID,
+			       ioatdca->iobase + global_req_table + (i * 4));
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+				      struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+			writel(0, ioatdca->iobase + global_req_table + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+			    struct device *dev,
+			    int cpu)
+{
+	u8 tag;
+
+	tag = ioat_dca_get_tag(dca, dev, cpu);
+	tag = (~tag) & 0x1F;
+	return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+	.add_requester		= ioat2_dca_add_requester,
+	.remove_requester	= ioat2_dca_remove_requester,
+	.get_tag		= ioat2_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+	int slots = 0;
+	u32 req;
+	u16 global_req_table;
+
+	global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+	if (global_req_table == 0)
+		return 0;
+	do {
+		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+		slots++;
+	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+	return slots;
+}
+
+struct dca_provider * __devinit
+ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	int slots;
+	int i;
+	int err;
+	u32 tag_map;
+	u16 dca_offset;
+	u16 csi_fsb_control;
+	u16 pcie_control;
+	u8 bit;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+	if (dca_offset == 0)
+		return NULL;
+
+	slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+	if (slots == 0)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat2_dca_ops,
+				 sizeof(*ioatdca)
+				      + (sizeof(struct ioat_dca_slot) * slots));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->iobase = iobase;
+	ioatdca->dca_base = iobase + dca_offset;
+	ioatdca->max_requesters = slots;
+
+	/* some bios might not know to turn these on */
+	csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+	if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+		csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+		writew(csi_fsb_control,
+		       ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+	}
+	pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+	if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+		pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+		writew(pcie_control,
+		       ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+	}
+
+
+	/* TODO version, compatibility and configuration checks */
+
+	/* copy out the APIC to DCA tag map */
+	tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+	for (i = 0; i < 5; i++) {
+		bit = (tag_map >> (4 * i)) & 0x0f;
+		if (bit < 8)
+			ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+		else
+			ioatdca->tag_map[i] = 0;
+	}
+
+	if (!dca2_tag_map_valid(ioatdca->tag_map)) {
+		dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
+			"disabling DCA\n");
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+			writel(id | IOAT_DCA_GREQID_VALID,
+			       ioatdca->iobase + global_req_table + (i * 4));
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+				      struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+			writel(0, ioatdca->iobase + global_req_table + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+			    struct device *dev,
+			    int cpu)
+{
+	u8 tag;
+
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	int i, apic_id, bit, value;
+	u8 entry;
+
+	tag = 0;
+	apic_id = cpu_physical_id(cpu);
+
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+		entry = ioatdca->tag_map[i];
+		if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+			bit = entry &
+				~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+			value = (apic_id & (1 << bit)) ? 1 : 0;
+		} else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+			bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+			value = (apic_id & (1 << bit)) ? 0 : 1;
+		} else {
+			value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+		}
+		tag |= (value << i);
+	}
+
+	return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+	.add_requester		= ioat3_dca_add_requester,
+	.remove_requester	= ioat3_dca_remove_requester,
+	.get_tag		= ioat3_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+	int slots = 0;
+	u32 req;
+	u16 global_req_table;
+
+	global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+	if (global_req_table == 0)
+		return 0;
+
+	do {
+		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+		slots++;
+	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+	return slots;
+}
+
+struct dca_provider * __devinit
+ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	int slots;
+	int i;
+	int err;
+	u16 dca_offset;
+	u16 csi_fsb_control;
+	u16 pcie_control;
+	u8 bit;
+
+	union {
+		u64 full;
+		struct {
+			u32 low;
+			u32 high;
+		};
+	} tag_map;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+	if (dca_offset == 0)
+		return NULL;
+
+	slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+	if (slots == 0)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat3_dca_ops,
+				 sizeof(*ioatdca)
+				      + (sizeof(struct ioat_dca_slot) * slots));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->iobase = iobase;
+	ioatdca->dca_base = iobase + dca_offset;
+	ioatdca->max_requesters = slots;
+
+	/* some bios might not know to turn these on */
+	csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+	if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+		csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+		writew(csi_fsb_control,
+		       ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+	}
+	pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+	if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+		pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+		writew(pcie_control,
+		       ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+	}
+
+
+	/* TODO version, compatibility and configuration checks */
+
+	/* copy out the APIC to DCA tag map */
+	tag_map.low =
+		readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+	tag_map.high =
+		readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+	for (i = 0; i < 8; i++) {
+		bit = tag_map.full >> (8 * i);
+		ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+	}
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 00000000..73b2b65c
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1233 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+		 "high-water mark for pushing ioat descriptors (default: 4)");
+
+/* internal functions */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat);
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+	struct ioatdma_device *instance = data;
+	struct ioat_chan_common *chan;
+	unsigned long attnstatus;
+	int bit;
+	u8 intrctrl;
+
+	intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+	if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+		return IRQ_NONE;
+
+	if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+		writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+		return IRQ_NONE;
+	}
+
+	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+	for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
+		chan = ioat_chan_by_index(instance, bit);
+		tasklet_schedule(&chan->cleanup_task);
+	}
+
+	writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+	return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+	struct ioat_chan_common *chan = data;
+
+	tasklet_schedule(&chan->cleanup_task);
+
+	return IRQ_HANDLED;
+}
+
+/* common channel initialization */
+void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx)
+{
+	struct dma_device *dma = &device->common;
+	struct dma_chan *c = &chan->common;
+	unsigned long data = (unsigned long) c;
+
+	chan->device = device;
+	chan->reg_base = device->reg_base + (0x80 * (idx + 1));
+	spin_lock_init(&chan->cleanup_lock);
+	chan->common.device = dma;
+	dma_cookie_init(&chan->common);
+	list_add_tail(&chan->common.device_node, &dma->channels);
+	device->idx[idx] = chan;
+	init_timer(&chan->timer);
+	chan->timer.function = device->timer_fn;
+	chan->timer.data = data;
+	tasklet_init(&chan->cleanup_task, device->cleanup_fn, data);
+	tasklet_disable(&chan->cleanup_task);
+}
+
+/**
+ * ioat1_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat1_enumerate_channels(struct ioatdma_device *device)
+{
+	u8 xfercap_scale;
+	u32 xfercap;
+	int i;
+	struct ioat_dma_chan *ioat;
+	struct device *dev = &device->pdev->dev;
+	struct dma_device *dma = &device->common;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+	dma->chancnt &= 0x1f; /* bits [4:0] valid */
+	if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+			 dma->chancnt, ARRAY_SIZE(device->idx));
+		dma->chancnt = ARRAY_SIZE(device->idx);
+	}
+	xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+	xfercap_scale &= 0x1f; /* bits [4:0] valid */
+	xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+	dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
+
+#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
+	if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+		dma->chancnt--;
+#endif
+	for (i = 0; i < dma->chancnt; i++) {
+		ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+		if (!ioat)
+			break;
+
+		ioat_init_channel(device, &ioat->base, i);
+		ioat->xfercap = xfercap;
+		spin_lock_init(&ioat->desc_lock);
+		INIT_LIST_HEAD(&ioat->free_desc);
+		INIT_LIST_HEAD(&ioat->used_desc);
+	}
+	dma->chancnt = i;
+	return i;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ *                                 descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void
+__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+{
+	void __iomem *reg_base = ioat->base.reg_base;
+
+	dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
+		__func__, ioat->pending);
+	ioat->pending = 0;
+	writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+
+	if (ioat->pending > 0) {
+		spin_lock_bh(&ioat->desc_lock);
+		__ioat1_dma_memcpy_issue_pending(ioat);
+		spin_unlock_bh(&ioat->desc_lock);
+	}
+}
+
+/**
+ * ioat1_reset_channel - restart a channel
+ * @ioat: IOAT DMA channel handle
+ */
+static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	void __iomem *reg_base = chan->reg_base;
+	u32 chansts, chanerr;
+
+	dev_warn(to_dev(chan), "reset\n");
+	chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
+	chansts = *chan->completion & IOAT_CHANSTS_STATUS;
+	if (chanerr) {
+		dev_err(to_dev(chan),
+			"chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+			chan_num(chan), chansts, chanerr);
+		writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+	}
+
+	/*
+	 * whack it upside the head with a reset
+	 * and wait for things to settle out.
+	 * force the pending count to a really big negative
+	 * to make sure no one forces an issue_pending
+	 * while we're waiting.
+	 */
+
+	ioat->pending = INT_MIN;
+	writeb(IOAT_CHANCMD_RESET,
+	       reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+	set_bit(IOAT_RESET_PENDING, &chan->state);
+	mod_timer(&chan->timer, jiffies + RESET_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *c = tx->chan;
+	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioat_desc_sw *first;
+	struct ioat_desc_sw *chain_tail;
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&ioat->desc_lock);
+	/* cookie incr and addition to used_list must be atomic */
+	cookie = dma_cookie_assign(tx);
+	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+	/* write address into NextDescriptor field of last desc in chain */
+	first = to_ioat_desc(desc->tx_list.next);
+	chain_tail = to_ioat_desc(ioat->used_desc.prev);
+	/* make descriptor updates globally visible before chaining */
+	wmb();
+	chain_tail->hw->next = first->txd.phys;
+	list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
+	dump_desc_dbg(ioat, chain_tail);
+	dump_desc_dbg(ioat, first);
+
+	if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	ioat->active += desc->hw->tx_cnt;
+	ioat->pending += desc->hw->tx_cnt;
+	if (ioat->pending >= ioat_pending_level)
+		__ioat1_dma_memcpy_issue_pending(ioat);
+	spin_unlock_bh(&ioat->desc_lock);
+
+	return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *
+ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+{
+	struct ioat_dma_descriptor *desc;
+	struct ioat_desc_sw *desc_sw;
+	struct ioatdma_device *ioatdma_device;
+	dma_addr_t phys;
+
+	ioatdma_device = ioat->base.device;
+	desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+	if (unlikely(!desc))
+		return NULL;
+
+	desc_sw = kzalloc(sizeof(*desc_sw), flags);
+	if (unlikely(!desc_sw)) {
+		pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+		return NULL;
+	}
+
+	memset(desc, 0, sizeof(*desc));
+
+	INIT_LIST_HEAD(&desc_sw->tx_list);
+	dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
+	desc_sw->txd.tx_submit = ioat1_tx_submit;
+	desc_sw->hw = desc;
+	desc_sw->txd.phys = phys;
+	set_desc_id(desc_sw, -1);
+
+	return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+		 "ioat1: initial descriptors per channel (default: 256)");
+/**
+ * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioat_desc_sw *desc;
+	u32 chanerr;
+	int i;
+	LIST_HEAD(tmp_list);
+
+	/* have we already been set up? */
+	if (!list_empty(&ioat->free_desc))
+		return ioat->desccount;
+
+	/* Setup register to interrupt and write completion status on error */
+	writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+	if (chanerr) {
+		dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+		writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+	}
+
+	/* Allocate descriptors */
+	for (i = 0; i < ioat_initial_desc_count; i++) {
+		desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
+		if (!desc) {
+			dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
+			break;
+		}
+		set_desc_id(desc, i);
+		list_add_tail(&desc->node, &tmp_list);
+	}
+	spin_lock_bh(&ioat->desc_lock);
+	ioat->desccount = i;
+	list_splice(&tmp_list, &ioat->free_desc);
+	spin_unlock_bh(&ioat->desc_lock);
+
+	/* allocate a completion writeback area */
+	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+	chan->completion = pci_pool_alloc(chan->device->completion_pool,
+					  GFP_KERNEL, &chan->completion_dma);
+	memset(chan->completion, 0, sizeof(*chan->completion));
+	writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+	       chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(((u64) chan->completion_dma) >> 32,
+	       chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+	tasklet_enable(&chan->cleanup_task);
+	ioat1_dma_start_null_desc(ioat);  /* give chain to dma device */
+	dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+		__func__, ioat->desccount);
+	return ioat->desccount;
+}
+
+/**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat1_dma_free_chan_resources(struct dma_chan *c)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioatdma_device *ioatdma_device = chan->device;
+	struct ioat_desc_sw *desc, *_desc;
+	int in_use_descs = 0;
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (ioat->desccount == 0)
+		return;
+
+	tasklet_disable(&chan->cleanup_task);
+	del_timer_sync(&chan->timer);
+	ioat1_cleanup(ioat);
+
+	/* Delay 100ms after reset to allow internal DMA logic to quiesce
+	 * before removing DMA descriptor resources.
+	 */
+	writeb(IOAT_CHANCMD_RESET,
+	       chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+	mdelay(100);
+
+	spin_lock_bh(&ioat->desc_lock);
+	list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
+		dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
+			__func__, desc_id(desc));
+		dump_desc_dbg(ioat, desc);
+		in_use_descs++;
+		list_del(&desc->node);
+		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+			      desc->txd.phys);
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc,
+				 &ioat->free_desc, node) {
+		list_del(&desc->node);
+		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+			      desc->txd.phys);
+		kfree(desc);
+	}
+	spin_unlock_bh(&ioat->desc_lock);
+
+	pci_pool_free(ioatdma_device->completion_pool,
+		      chan->completion,
+		      chan->completion_dma);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+
+	chan->last_completion = 0;
+	chan->completion_dma = 0;
+	ioat->pending = 0;
+	ioat->desccount = 0;
+}
+
+/**
+ * ioat1_dma_get_next_descriptor - return the next available descriptor
+ * @ioat: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held.  Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+{
+	struct ioat_desc_sw *new;
+
+	if (!list_empty(&ioat->free_desc)) {
+		new = to_ioat_desc(ioat->free_desc.next);
+		list_del(&new->node);
+	} else {
+		/* try to get another desc */
+		new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
+		if (!new) {
+			dev_err(to_dev(&ioat->base), "alloc failed\n");
+			return NULL;
+		}
+	}
+	dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
+		__func__, desc_id(new));
+	prefetch(new->hw);
+	return new;
+}
+
+static struct dma_async_tx_descriptor *
+ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+		      dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	struct ioat_desc_sw *desc;
+	size_t copy;
+	LIST_HEAD(chain);
+	dma_addr_t src = dma_src;
+	dma_addr_t dest = dma_dest;
+	size_t total_len = len;
+	struct ioat_dma_descriptor *hw = NULL;
+	int tx_cnt = 0;
+
+	spin_lock_bh(&ioat->desc_lock);
+	desc = ioat1_dma_get_next_descriptor(ioat);
+	do {
+		if (!desc)
+			break;
+
+		tx_cnt++;
+		copy = min_t(size_t, len, ioat->xfercap);
+
+		hw = desc->hw;
+		hw->size = copy;
+		hw->ctl = 0;
+		hw->src_addr = src;
+		hw->dst_addr = dest;
+
+		list_add_tail(&desc->node, &chain);
+
+		len -= copy;
+		dest += copy;
+		src += copy;
+		if (len) {
+			struct ioat_desc_sw *next;
+
+			async_tx_ack(&desc->txd);
+			next = ioat1_dma_get_next_descriptor(ioat);
+			hw->next = next ? next->txd.phys : 0;
+			dump_desc_dbg(ioat, desc);
+			desc = next;
+		} else
+			hw->next = 0;
+	} while (len);
+
+	if (!desc) {
+		struct ioat_chan_common *chan = &ioat->base;
+
+		dev_err(to_dev(chan),
+			"chan%d - get_next_desc failed\n", chan_num(chan));
+		list_splice(&chain, &ioat->free_desc);
+		spin_unlock_bh(&ioat->desc_lock);
+		return NULL;
+	}
+	spin_unlock_bh(&ioat->desc_lock);
+
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	list_splice(&chain, &desc->tx_list);
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.compl_write = 1;
+	hw->tx_cnt = tx_cnt;
+	dump_desc_dbg(ioat, desc);
+
+	return &desc->txd;
+}
+
+static void ioat1_cleanup_event(unsigned long data)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
+
+	ioat1_cleanup(ioat);
+	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+		    size_t len, struct ioat_dma_descriptor *hw)
+{
+	struct pci_dev *pdev = chan->device->pdev;
+	size_t offset = len - hw->size;
+
+	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+		ioat_unmap(pdev, hw->dst_addr - offset, len,
+			   PCI_DMA_FROMDEVICE, flags, 1);
+
+	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
+		ioat_unmap(pdev, hw->src_addr - offset, len,
+			   PCI_DMA_TODEVICE, flags, 0);
+}
+
+dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
+{
+	dma_addr_t phys_complete;
+	u64 completion;
+
+	completion = *chan->completion;
+	phys_complete = ioat_chansts_to_addr(completion);
+
+	dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+		(unsigned long long) phys_complete);
+
+	if (is_ioat_halted(completion)) {
+		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+		dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
+			chanerr);
+
+		/* TODO do something to salvage the situation */
+	}
+
+	return phys_complete;
+}
+
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+			   dma_addr_t *phys_complete)
+{
+	*phys_complete = ioat_get_current_completion(chan);
+	if (*phys_complete == chan->last_completion)
+		return false;
+	clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	return true;
+}
+
+static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	struct list_head *_desc, *n;
+	struct dma_async_tx_descriptor *tx;
+
+	dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n",
+		 __func__, (unsigned long long) phys_complete);
+	list_for_each_safe(_desc, n, &ioat->used_desc) {
+		struct ioat_desc_sw *desc;
+
+		prefetch(n);
+		desc = list_entry(_desc, typeof(*desc), node);
+		tx = &desc->txd;
+		/*
+		 * Incoming DMA requests may use multiple descriptors,
+		 * due to exceeding xfercap, perhaps. If so, only the
+		 * last one will have a cookie, and require unmapping.
+		 */
+		dump_desc_dbg(ioat, desc);
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+			ioat->active -= desc->hw->tx_cnt;
+			if (tx->callback) {
+				tx->callback(tx->callback_param);
+				tx->callback = NULL;
+			}
+		}
+
+		if (tx->phys != phys_complete) {
+			/*
+			 * a completed entry, but not the last, so clean
+			 * up if the client is done with the descriptor
+			 */
+			if (async_tx_test_ack(tx))
+				list_move_tail(&desc->node, &ioat->free_desc);
+		} else {
+			/*
+			 * last used desc. Do not remove, so we can
+			 * append from it.
+			 */
+
+			/* if nothing else is pending, cancel the
+			 * completion timeout
+			 */
+			if (n == &ioat->used_desc) {
+				dev_dbg(to_dev(chan),
+					"%s cancel completion timeout\n",
+					__func__);
+				clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+			}
+
+			/* TODO check status bits? */
+			break;
+		}
+	}
+
+	chan->last_completion = phys_complete;
+}
+
+/**
+ * ioat1_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ *
+ * To prevent lock contention we defer cleanup when the locks are
+ * contended with a terminal timeout that forces cleanup and catches
+ * completion notification errors.
+ */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	dma_addr_t phys_complete;
+
+	prefetch(chan->completion);
+
+	if (!spin_trylock_bh(&chan->cleanup_lock))
+		return;
+
+	if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+		spin_unlock_bh(&chan->cleanup_lock);
+		return;
+	}
+
+	if (!spin_trylock_bh(&ioat->desc_lock)) {
+		spin_unlock_bh(&chan->cleanup_lock);
+		return;
+	}
+
+	__cleanup(ioat, phys_complete);
+
+	spin_unlock_bh(&ioat->desc_lock);
+	spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat1_timer_event(unsigned long data)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
+	struct ioat_chan_common *chan = &ioat->base;
+
+	dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+
+	spin_lock_bh(&chan->cleanup_lock);
+	if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
+		struct ioat_desc_sw *desc;
+
+		spin_lock_bh(&ioat->desc_lock);
+
+		/* restart active descriptors */
+		desc = to_ioat_desc(ioat->used_desc.prev);
+		ioat_set_chainaddr(ioat, desc->txd.phys);
+		ioat_start(chan);
+
+		ioat->pending = 0;
+		set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+		spin_unlock_bh(&ioat->desc_lock);
+	} else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+		dma_addr_t phys_complete;
+
+		spin_lock_bh(&ioat->desc_lock);
+		/* if we haven't made progress and we have already
+		 * acknowledged a pending completion once, then be more
+		 * forceful with a restart
+		 */
+		if (ioat_cleanup_preamble(chan, &phys_complete))
+			__cleanup(ioat, phys_complete);
+		else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+			ioat1_reset_channel(ioat);
+		else {
+			u64 status = ioat_chansts(chan);
+
+			/* manually update the last completion address */
+			if (ioat_chansts_to_addr(status) != 0)
+				*chan->completion = status;
+
+			set_bit(IOAT_COMPLETION_ACK, &chan->state);
+			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+		}
+		spin_unlock_bh(&ioat->desc_lock);
+	}
+	spin_unlock_bh(&chan->cleanup_lock);
+}
+
+enum dma_status
+ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		   struct dma_tx_state *txstate)
+{
+	struct ioat_chan_common *chan = to_chan_common(c);
+	struct ioatdma_device *device = chan->device;
+	enum dma_status ret;
+
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	device->cleanup_fn((unsigned long) c);
+
+	return dma_cookie_status(c, cookie, txstate);
+}
+
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioat_desc_sw *desc;
+	struct ioat_dma_descriptor *hw;
+
+	spin_lock_bh(&ioat->desc_lock);
+
+	desc = ioat1_dma_get_next_descriptor(ioat);
+
+	if (!desc) {
+		dev_err(to_dev(chan),
+			"Unable to start null desc - get next desc failed\n");
+		spin_unlock_bh(&ioat->desc_lock);
+		return;
+	}
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.compl_write = 1;
+	/* set size to non-zero value (channel returns error when size is 0) */
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+	async_tx_ack(&desc->txd);
+	hw->next = 0;
+	list_add_tail(&desc->node, &ioat->used_desc);
+	dump_desc_dbg(ioat, desc);
+
+	ioat_set_chainaddr(ioat, desc->txd.phys);
+	ioat_start(chan);
+	spin_unlock_bh(&ioat->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void __devinit ioat_dma_test_callback(void *dma_async_param)
+{
+	struct completion *cmp = dma_async_param;
+
+	complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+{
+	int i;
+	u8 *src;
+	u8 *dest;
+	struct dma_device *dma = &device->common;
+	struct device *dev = &device->pdev->dev;
+	struct dma_chan *dma_chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int err = 0;
+	struct completion cmp;
+	unsigned long tmo;
+	unsigned long flags;
+
+	src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOAT_TEST_SIZE; i++)
+		src[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(dma->channels.next, struct dma_chan,
+				device_node);
+	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+		dev_err(dev, "selftest cannot allocate chan resource\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+	flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
+		DMA_PREP_INTERRUPT;
+	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+						   IOAT_TEST_SIZE, flags);
+	if (!tx) {
+		dev_err(dev, "Self-test prep failed, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test setup failed, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL)
+					!= DMA_SUCCESS) {
+		dev_err(dev, "Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+		dev_err(dev, "Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	dma->device_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+		    sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+		 "set ioat interrupt style: msix (default), "
+		 "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+	struct ioat_chan_common *chan;
+	struct pci_dev *pdev = device->pdev;
+	struct device *dev = &pdev->dev;
+	struct msix_entry *msix;
+	int i, j, msixcnt;
+	int err = -EINVAL;
+	u8 intrctrl = 0;
+
+	if (!strcmp(ioat_interrupt_style, "msix"))
+		goto msix;
+	if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+		goto msix_single_vector;
+	if (!strcmp(ioat_interrupt_style, "msi"))
+		goto msi;
+	if (!strcmp(ioat_interrupt_style, "intx"))
+		goto intx;
+	dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+	goto err_no_irq;
+
+msix:
+	/* The number of MSI-X vectors should equal the number of channels */
+	msixcnt = device->common.chancnt;
+	for (i = 0; i < msixcnt; i++)
+		device->msix_entries[i].entry = i;
+
+	err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
+	if (err < 0)
+		goto msi;
+	if (err > 0)
+		goto msix_single_vector;
+
+	for (i = 0; i < msixcnt; i++) {
+		msix = &device->msix_entries[i];
+		chan = ioat_chan_by_index(device, i);
+		err = devm_request_irq(dev, msix->vector,
+				       ioat_dma_do_interrupt_msix, 0,
+				       "ioat-msix", chan);
+		if (err) {
+			for (j = 0; j < i; j++) {
+				msix = &device->msix_entries[j];
+				chan = ioat_chan_by_index(device, j);
+				devm_free_irq(dev, msix->vector, chan);
+			}
+			goto msix_single_vector;
+		}
+	}
+	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+	goto done;
+
+msix_single_vector:
+	msix = &device->msix_entries[0];
+	msix->entry = 0;
+	err = pci_enable_msix(pdev, device->msix_entries, 1);
+	if (err)
+		goto msi;
+
+	err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
+			       "ioat-msix", device);
+	if (err) {
+		pci_disable_msix(pdev);
+		goto msi;
+	}
+	goto done;
+
+msi:
+	err = pci_enable_msi(pdev);
+	if (err)
+		goto intx;
+
+	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+			       "ioat-msi", device);
+	if (err) {
+		pci_disable_msi(pdev);
+		goto intx;
+	}
+	goto done;
+
+intx:
+	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+			       IRQF_SHARED, "ioat-intx", device);
+	if (err)
+		goto err_no_irq;
+
+done:
+	if (device->intr_quirk)
+		device->intr_quirk(device);
+	intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+	writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	return 0;
+
+err_no_irq:
+	/* Disable all interrupt generation */
+	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	dev_err(dev, "no usable interrupts\n");
+	return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *device)
+{
+	/* Disable all interrupt generation */
+	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device)
+{
+	int err = -ENODEV;
+	struct dma_device *dma = &device->common;
+	struct pci_dev *pdev = device->pdev;
+	struct device *dev = &pdev->dev;
+
+	/* DMA coherent memory pool for DMA descriptor allocations */
+	device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+					   sizeof(struct ioat_dma_descriptor),
+					   64, 0);
+	if (!device->dma_pool) {
+		err = -ENOMEM;
+		goto err_dma_pool;
+	}
+
+	device->completion_pool = pci_pool_create("completion_pool", pdev,
+						  sizeof(u64), SMP_CACHE_BYTES,
+						  SMP_CACHE_BYTES);
+
+	if (!device->completion_pool) {
+		err = -ENOMEM;
+		goto err_completion_pool;
+	}
+
+	device->enumerate_channels(device);
+
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma->dev = &pdev->dev;
+
+	if (!dma->chancnt) {
+		dev_err(dev, "channel enumeration error\n");
+		goto err_setup_interrupts;
+	}
+
+	err = ioat_dma_setup_interrupts(device);
+	if (err)
+		goto err_setup_interrupts;
+
+	err = device->self_test(device);
+	if (err)
+		goto err_self_test;
+
+	return 0;
+
+err_self_test:
+	ioat_disable_interrupts(device);
+err_setup_interrupts:
+	pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+	pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+	return err;
+}
+
+int __devinit ioat_register(struct ioatdma_device *device)
+{
+	int err = dma_async_device_register(&device->common);
+
+	if (err) {
+		ioat_disable_interrupts(device);
+		pci_pool_destroy(device->completion_pool);
+		pci_pool_destroy(device->dma_pool);
+	}
+
+	return err;
+}
+
+/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
+static void ioat1_intr_quirk(struct ioatdma_device *device)
+{
+	struct pci_dev *pdev = device->pdev;
+	u32 dmactrl;
+
+	pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+	if (pdev->msi_enabled)
+		dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+	else
+		dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
+	pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+	return sprintf(page, "%d\n", ioat->desccount);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+	return sprintf(page, "%d\n", ioat->active);
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+	struct dma_device *dma = c->device;
+
+	return sprintf(page, "copy%s%s%s%s%s%s\n",
+		       dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+		       dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+		       dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+		       dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+		       dma_has_cap(DMA_MEMSET, dma->cap_mask)  ? " fill" : "",
+		       dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+	struct dma_device *dma = c->device;
+	struct ioatdma_device *device = to_ioatdma_device(dma);
+
+	return sprintf(page, "%d.%d\n",
+		       device->version >> 4, device->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static struct attribute *ioat1_attrs[] = {
+	&ring_size_attr.attr,
+	&ring_active_attr.attr,
+	&ioat_cap_attr.attr,
+	&ioat_version_attr.attr,
+	NULL,
+};
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+	struct ioat_sysfs_entry *entry;
+	struct ioat_chan_common *chan;
+
+	entry = container_of(attr, struct ioat_sysfs_entry, attr);
+	chan = container_of(kobj, struct ioat_chan_common, kobj);
+
+	if (!entry->show)
+		return -EIO;
+	return entry->show(&chan->common, page);
+}
+
+const struct sysfs_ops ioat_sysfs_ops = {
+	.show	= ioat_attr_show,
+};
+
+static struct kobj_type ioat1_ktype = {
+	.sysfs_ops = &ioat_sysfs_ops,
+	.default_attrs = ioat1_attrs,
+};
+
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+{
+	struct dma_device *dma = &device->common;
+	struct dma_chan *c;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		struct ioat_chan_common *chan = to_chan_common(c);
+		struct kobject *parent = &c->dev->device.kobj;
+		int err;
+
+		err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+		if (err) {
+			dev_warn(to_dev(chan),
+				 "sysfs init error (%d), continuing...\n", err);
+			kobject_put(&chan->kobj);
+			set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+		}
+	}
+}
+
+void ioat_kobject_del(struct ioatdma_device *device)
+{
+	struct dma_device *dma = &device->common;
+	struct dma_chan *c;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		struct ioat_chan_common *chan = to_chan_common(c);
+
+		if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+			kobject_del(&chan->kobj);
+			kobject_put(&chan->kobj);
+		}
+	}
+}
+
+int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
+{
+	struct pci_dev *pdev = device->pdev;
+	struct dma_device *dma;
+	int err;
+
+	device->intr_quirk = ioat1_intr_quirk;
+	device->enumerate_channels = ioat1_enumerate_channels;
+	device->self_test = ioat_dma_self_test;
+	device->timer_fn = ioat1_timer_event;
+	device->cleanup_fn = ioat1_cleanup_event;
+	dma = &device->common;
+	dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+	dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
+	dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
+	dma->device_tx_status = ioat_dma_tx_status;
+
+	err = ioat_probe(device);
+	if (err)
+		return err;
+	ioat_set_tcp_copy_break(4096);
+	err = ioat_register(device);
+	if (err)
+		return err;
+	ioat_kobject_add(device, &ioat1_ktype);
+
+	if (dca)
+		device->dca = ioat_dca_init(pdev, device->reg_base);
+
+	return err;
+}
+
+void __devexit ioat_dma_remove(struct ioatdma_device *device)
+{
+	struct dma_device *dma = &device->common;
+
+	ioat_disable_interrupts(device);
+
+	ioat_kobject_del(device);
+
+	dma_async_device_unregister(dma);
+
+	pci_pool_destroy(device->dma_pool);
+	pci_pool_destroy(device->completion_pool);
+
+	INIT_LIST_HEAD(&dma->channels);
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 00000000..5e8fe01b
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,327 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "hw.h"
+#include "registers.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION  "4.00"
+
+#define IOAT_LOW_COMPLETION_MASK	0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU		~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
+#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
+ * @enumerate_channels: hw version specific channel enumeration
+ * @reset_hw: hw version specific channel (re)initialization
+ * @cleanup_fn: select between the v2 and v3 cleanup routines
+ * @timer_fn: select between the v2 and v3 timer watchdog routines
+ * @self_test: hardware version specific self test for each supported op type
+ *
+ * Note: the v3 cleanup routine supports raid operations
+ */
+struct ioatdma_device {
+	struct pci_dev *pdev;
+	void __iomem *reg_base;
+	struct pci_pool *dma_pool;
+	struct pci_pool *completion_pool;
+	struct dma_device common;
+	u8 version;
+	struct msix_entry msix_entries[4];
+	struct ioat_chan_common *idx[4];
+	struct dca_provider *dca;
+	void (*intr_quirk)(struct ioatdma_device *device);
+	int (*enumerate_channels)(struct ioatdma_device *device);
+	int (*reset_hw)(struct ioat_chan_common *chan);
+	void (*cleanup_fn)(unsigned long data);
+	void (*timer_fn)(unsigned long data);
+	int (*self_test)(struct ioatdma_device *device);
+};
+
+struct ioat_chan_common {
+	struct dma_chan common;
+	void __iomem *reg_base;
+	dma_addr_t last_completion;
+	spinlock_t cleanup_lock;
+	unsigned long state;
+	#define IOAT_COMPLETION_PENDING 0
+	#define IOAT_COMPLETION_ACK 1
+	#define IOAT_RESET_PENDING 2
+	#define IOAT_KOBJ_INIT_FAIL 3
+	#define IOAT_RESHAPE_PENDING 4
+	#define IOAT_RUN 5
+	struct timer_list timer;
+	#define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+	#define IDLE_TIMEOUT msecs_to_jiffies(2000)
+	#define RESET_DELAY msecs_to_jiffies(100)
+	struct ioatdma_device *device;
+	dma_addr_t completion_dma;
+	u64 *completion;
+	struct tasklet_struct cleanup_task;
+	struct kobject kobj;
+};
+
+struct ioat_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct dma_chan *, char *);
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+	struct ioat_chan_common base;
+
+	size_t xfercap;	/* XFERCAP register value expanded out */
+
+	spinlock_t desc_lock;
+	struct list_head free_desc;
+	struct list_head used_desc;
+
+	int pending;
+	u16 desccount;
+	u16 active;
+};
+
+static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
+{
+	return container_of(c, struct ioat_chan_common, common);
+}
+
+static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
+{
+	struct ioat_chan_common *chan = to_chan_common(c);
+
+	return container_of(chan, struct ioat_dma_chan, base);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @node: this descriptor will either be on the free list,
+ *     or attached to a transaction list (tx_list)
+ * @txd: the generic software descriptor for all engines
+ * @id: identifier for debug
+ */
+struct ioat_desc_sw {
+	struct ioat_dma_descriptor *hw;
+	struct list_head node;
+	size_t len;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor txd;
+	#ifdef DEBUG
+	int id;
+	#endif
+};
+
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+		struct dma_async_tx_descriptor *tx, int id)
+{
+	struct device *dev = to_dev(chan);
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+		" ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+		(unsigned long long) tx->phys,
+		(unsigned long long) hw->next, tx->cookie, tx->flags,
+		hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+	({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
+{
+	#ifdef CONFIG_NET_DMA
+	sysctl_tcp_dma_copybreak = copybreak;
+	#endif
+}
+
+static inline struct ioat_chan_common *
+ioat_chan_by_index(struct ioatdma_device *device, int index)
+{
+	return device->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+	u8 ver = chan->device->version;
+	u64 status;
+	u32 status_lo;
+
+	/* We need to read the low address first as this causes the
+	 * chipset to latch the upper bits for the subsequent read
+	 */
+	status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+	status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+	status <<= 32;
+	status |= status_lo;
+
+	return status;
+}
+
+static inline void ioat_start(struct ioat_chan_common *chan)
+{
+	u8 ver = chan->device->version;
+
+	writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+	return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+{
+	return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioat_chan_common *chan)
+{
+	u8 ver = chan->device->version;
+
+	writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_reset(struct ioat_chan_common *chan)
+{
+	u8 ver = chan->device->version;
+
+	writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline bool ioat_reset_pending(struct ioat_chan_common *chan)
+{
+	u8 ver = chan->device->version;
+	u8 cmd;
+
+	cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+	return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+
+	writel(addr & 0x00000000FFFFFFFF,
+	       chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+	writel(addr >> 32,
+	       chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+	return !!err;
+}
+
+static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+			      int direction, enum dma_ctrl_flags flags, bool dst)
+{
+	if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+	    (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+		pci_unmap_single(pdev, addr, len, direction);
+	else
+		pci_unmap_page(pdev, addr, len, direction);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device);
+int __devinit ioat_register(struct ioatdma_device *device);
+int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat_dma_self_test(struct ioatdma_device *device);
+void __devexit ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
+					      void __iomem *iobase);
+dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan);
+void ioat_init_channel(struct ioatdma_device *device,
+		       struct ioat_chan_common *chan, int idx);
+enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+				   struct dma_tx_state *txstate);
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+		    size_t len, struct ioat_dma_descriptor *hw);
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+			   dma_addr_t *phys_complete);
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *device);
+extern const struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644
index 00000000..86895760
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,907 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
+ * does asynchronous data movement and checksumming operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+		 "ioat2+: allocate 2^n descriptors per channel"
+		 " (default: 8 max: 16)");
+static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+		 "ioat2+: upper limit for ring size (default: 16)");
+
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+
+	ioat->dmacount += ioat2_ring_pending(ioat);
+	ioat->issued = ioat->head;
+	writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+	dev_dbg(to_dev(chan),
+		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+}
+
+void ioat2_issue_pending(struct dma_chan *c)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+	if (ioat2_ring_pending(ioat)) {
+		spin_lock_bh(&ioat->prep_lock);
+		__ioat2_issue_pending(ioat);
+		spin_unlock_bh(&ioat->prep_lock);
+	}
+}
+
+/**
+ * ioat2_update_pending - log pending descriptors
+ * @ioat: ioat2+ channel
+ *
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark.  Called with prep_lock held
+ */
+static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
+{
+	if (ioat2_ring_pending(ioat) > ioat_pending_level)
+		__ioat2_issue_pending(ioat);
+}
+
+static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_ring_ent *desc;
+	struct ioat_dma_descriptor *hw;
+
+	if (ioat2_ring_space(ioat) < 1) {
+		dev_err(to_dev(&ioat->base),
+			"Unable to start null desc - ring full\n");
+		return;
+	}
+
+	dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat->head, ioat->tail, ioat->issued);
+	desc = ioat2_get_ring_ent(ioat, ioat->head);
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.compl_write = 1;
+	/* set size to non-zero value (channel returns error when size is 0) */
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+	async_tx_ack(&desc->txd);
+	ioat2_set_chainaddr(ioat, desc->txd.phys);
+	dump_desc_dbg(ioat, desc);
+	wmb();
+	ioat->head += 1;
+	__ioat2_issue_pending(ioat);
+}
+
+static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+	spin_lock_bh(&ioat->prep_lock);
+	__ioat2_start_null_desc(ioat);
+	spin_unlock_bh(&ioat->prep_lock);
+}
+
+static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	struct dma_async_tx_descriptor *tx;
+	struct ioat_ring_ent *desc;
+	bool seen_current = false;
+	u16 active;
+	int idx = ioat->tail, i;
+
+	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat->head, ioat->tail, ioat->issued);
+
+	active = ioat2_ring_active(ioat);
+	for (i = 0; i < active && !seen_current; i++) {
+		smp_read_barrier_depends();
+		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		tx = &desc->txd;
+		dump_desc_dbg(ioat, desc);
+		if (tx->cookie) {
+			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+			dma_cookie_complete(tx);
+			if (tx->callback) {
+				tx->callback(tx->callback_param);
+				tx->callback = NULL;
+			}
+		}
+
+		if (tx->phys == phys_complete)
+			seen_current = true;
+	}
+	smp_mb(); /* finish all descriptor reads before incrementing tail */
+	ioat->tail = idx + i;
+	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
+
+	chan->last_completion = phys_complete;
+	if (active - i == 0) {
+		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+			__func__);
+		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+	}
+}
+
+/**
+ * ioat2_cleanup - clean finished descriptors (advance tail pointer)
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	dma_addr_t phys_complete;
+
+	spin_lock_bh(&chan->cleanup_lock);
+	if (ioat_cleanup_preamble(chan, &phys_complete))
+		__cleanup(ioat, phys_complete);
+	spin_unlock_bh(&chan->cleanup_lock);
+}
+
+void ioat2_cleanup_event(unsigned long data)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
+
+	ioat2_cleanup(ioat);
+	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+
+	/* set the tail to be re-issued */
+	ioat->issued = ioat->tail;
+	ioat->dmacount = 0;
+	set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	dev_dbg(to_dev(chan),
+		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+
+	if (ioat2_ring_pending(ioat)) {
+		struct ioat_ring_ent *desc;
+
+		desc = ioat2_get_ring_ent(ioat, ioat->tail);
+		ioat2_set_chainaddr(ioat, desc->txd.phys);
+		__ioat2_issue_pending(ioat);
+	} else
+		__ioat2_start_null_desc(ioat);
+}
+
+int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo)
+{
+	unsigned long end = jiffies + tmo;
+	int err = 0;
+	u32 status;
+
+	status = ioat_chansts(chan);
+	if (is_ioat_active(status) || is_ioat_idle(status))
+		ioat_suspend(chan);
+	while (is_ioat_active(status) || is_ioat_idle(status)) {
+		if (tmo && time_after(jiffies, end)) {
+			err = -ETIMEDOUT;
+			break;
+		}
+		status = ioat_chansts(chan);
+		cpu_relax();
+	}
+
+	return err;
+}
+
+int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo)
+{
+	unsigned long end = jiffies + tmo;
+	int err = 0;
+
+	ioat_reset(chan);
+	while (ioat_reset_pending(chan)) {
+		if (end && time_after(jiffies, end)) {
+			err = -ETIMEDOUT;
+			break;
+		}
+		cpu_relax();
+	}
+
+	return err;
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	dma_addr_t phys_complete;
+
+	ioat2_quiesce(chan, 0);
+	if (ioat_cleanup_preamble(chan, &phys_complete))
+		__cleanup(ioat, phys_complete);
+
+	__ioat2_restart_chan(ioat);
+}
+
+void ioat2_timer_event(unsigned long data)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
+	struct ioat_chan_common *chan = &ioat->base;
+
+	if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+		dma_addr_t phys_complete;
+		u64 status;
+
+		status = ioat_chansts(chan);
+
+		/* when halted due to errors check for channel
+		 * programming errors before advancing the completion state
+		 */
+		if (is_ioat_halted(status)) {
+			u32 chanerr;
+
+			chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+			dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
+				__func__, chanerr);
+			if (test_bit(IOAT_RUN, &chan->state))
+				BUG_ON(is_ioat_bug(chanerr));
+			else /* we never got off the ground */
+				return;
+		}
+
+		/* if we haven't made progress and we have already
+		 * acknowledged a pending completion once, then be more
+		 * forceful with a restart
+		 */
+		spin_lock_bh(&chan->cleanup_lock);
+		if (ioat_cleanup_preamble(chan, &phys_complete)) {
+			__cleanup(ioat, phys_complete);
+		} else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
+			spin_lock_bh(&ioat->prep_lock);
+			ioat2_restart_channel(ioat);
+			spin_unlock_bh(&ioat->prep_lock);
+		} else {
+			set_bit(IOAT_COMPLETION_ACK, &chan->state);
+			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+		}
+		spin_unlock_bh(&chan->cleanup_lock);
+	} else {
+		u16 active;
+
+		/* if the ring is idle, empty, and oversized try to step
+		 * down the size
+		 */
+		spin_lock_bh(&chan->cleanup_lock);
+		spin_lock_bh(&ioat->prep_lock);
+		active = ioat2_ring_active(ioat);
+		if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+			reshape_ring(ioat, ioat->alloc_order-1);
+		spin_unlock_bh(&ioat->prep_lock);
+		spin_unlock_bh(&chan->cleanup_lock);
+
+		/* keep shrinking until we get back to our minimum
+		 * default size
+		 */
+		if (ioat->alloc_order > ioat_get_alloc_order())
+			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+	}
+}
+
+static int ioat2_reset_hw(struct ioat_chan_common *chan)
+{
+	/* throw away whatever the channel was doing and get it initialized */
+	u32 chanerr;
+
+	ioat2_quiesce(chan, msecs_to_jiffies(100));
+
+	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+
+	return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+}
+
+/**
+ * ioat2_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+int ioat2_enumerate_channels(struct ioatdma_device *device)
+{
+	struct ioat2_dma_chan *ioat;
+	struct device *dev = &device->pdev->dev;
+	struct dma_device *dma = &device->common;
+	u8 xfercap_log;
+	int i;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+	dma->chancnt &= 0x1f; /* bits [4:0] valid */
+	if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+			 dma->chancnt, ARRAY_SIZE(device->idx));
+		dma->chancnt = ARRAY_SIZE(device->idx);
+	}
+	xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+	xfercap_log &= 0x1f; /* bits [4:0] valid */
+	if (xfercap_log == 0)
+		return 0;
+	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+	/* FIXME which i/oat version is i7300? */
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+	if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+		dma->chancnt--;
+#endif
+	for (i = 0; i < dma->chancnt; i++) {
+		ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+		if (!ioat)
+			break;
+
+		ioat_init_channel(device, &ioat->base, i);
+		ioat->xfercap_log = xfercap_log;
+		spin_lock_init(&ioat->prep_lock);
+		if (device->reset_hw(&ioat->base)) {
+			i = 0;
+			break;
+		}
+	}
+	dma->chancnt = i;
+	return i;
+}
+
+static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *c = tx->chan;
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	dma_cookie_t cookie;
+
+	cookie = dma_cookie_assign(tx);
+	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+	if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	/* make descriptor updates visible before advancing ioat->head,
+	 * this is purposefully not smp_wmb() since we are also
+	 * publishing the descriptor updates to a dma device
+	 */
+	wmb();
+
+	ioat->head += ioat->produce;
+
+	ioat2_update_pending(ioat);
+	spin_unlock_bh(&ioat->prep_lock);
+
+	return cookie;
+}
+
+static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+{
+	struct ioat_dma_descriptor *hw;
+	struct ioat_ring_ent *desc;
+	struct ioatdma_device *dma;
+	dma_addr_t phys;
+
+	dma = to_ioatdma_device(chan->device);
+	hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
+	if (!hw)
+		return NULL;
+	memset(hw, 0, sizeof(*hw));
+
+	desc = kmem_cache_alloc(ioat2_cache, flags);
+	if (!desc) {
+		pci_pool_free(dma->dma_pool, hw, phys);
+		return NULL;
+	}
+	memset(desc, 0, sizeof(*desc));
+
+	dma_async_tx_descriptor_init(&desc->txd, chan);
+	desc->txd.tx_submit = ioat2_tx_submit_unlock;
+	desc->hw = hw;
+	desc->txd.phys = phys;
+	return desc;
+}
+
+static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+	struct ioatdma_device *dma;
+
+	dma = to_ioatdma_device(chan->device);
+	pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
+	kmem_cache_free(ioat2_cache, desc);
+}
+
+static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+	struct ioat_ring_ent **ring;
+	int descs = 1 << order;
+	int i;
+
+	if (order > ioat_get_max_alloc_order())
+		return NULL;
+
+	/* allocate the array to hold the software ring */
+	ring = kcalloc(descs, sizeof(*ring), flags);
+	if (!ring)
+		return NULL;
+	for (i = 0; i < descs; i++) {
+		ring[i] = ioat2_alloc_ring_ent(c, flags);
+		if (!ring[i]) {
+			while (i--)
+				ioat2_free_ring_ent(ring[i], c);
+			kfree(ring);
+			return NULL;
+		}
+		set_desc_id(ring[i], i);
+	}
+
+	/* link descs */
+	for (i = 0; i < descs-1; i++) {
+		struct ioat_ring_ent *next = ring[i+1];
+		struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+		hw->next = next->txd.phys;
+	}
+	ring[i]->hw->next = ring[0]->txd.phys;
+
+	return ring;
+}
+
+void ioat2_free_chan_resources(struct dma_chan *c);
+
+/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
+ * @chan: channel to be initialized
+ */
+int ioat2_alloc_chan_resources(struct dma_chan *c)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioat_ring_ent **ring;
+	u64 status;
+	int order;
+	int i = 0;
+
+	/* have we already been set up? */
+	if (ioat->ring)
+		return 1 << ioat->alloc_order;
+
+	/* Setup register to interrupt and write completion status on error */
+	writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+	/* allocate a completion writeback area */
+	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+	chan->completion = pci_pool_alloc(chan->device->completion_pool,
+					  GFP_KERNEL, &chan->completion_dma);
+	if (!chan->completion)
+		return -ENOMEM;
+
+	memset(chan->completion, 0, sizeof(*chan->completion));
+	writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+	       chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(((u64) chan->completion_dma) >> 32,
+	       chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+	order = ioat_get_alloc_order();
+	ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
+	if (!ring)
+		return -ENOMEM;
+
+	spin_lock_bh(&chan->cleanup_lock);
+	spin_lock_bh(&ioat->prep_lock);
+	ioat->ring = ring;
+	ioat->head = 0;
+	ioat->issued = 0;
+	ioat->tail = 0;
+	ioat->alloc_order = order;
+	spin_unlock_bh(&ioat->prep_lock);
+	spin_unlock_bh(&chan->cleanup_lock);
+
+	tasklet_enable(&chan->cleanup_task);
+	ioat2_start_null_desc(ioat);
+
+	/* check that we got off the ground */
+	do {
+		udelay(1);
+		status = ioat_chansts(chan);
+	} while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
+	if (is_ioat_active(status) || is_ioat_idle(status)) {
+		set_bit(IOAT_RUN, &chan->state);
+		return 1 << ioat->alloc_order;
+	} else {
+		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+
+		dev_WARN(to_dev(chan),
+			"failed to start channel chanerr: %#x\n", chanerr);
+		ioat2_free_chan_resources(c);
+		return -EFAULT;
+	}
+}
+
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+{
+	/* reshape differs from normal ring allocation in that we want
+	 * to allocate a new software ring while only
+	 * extending/truncating the hardware ring
+	 */
+	struct ioat_chan_common *chan = &ioat->base;
+	struct dma_chan *c = &chan->common;
+	const u32 curr_size = ioat2_ring_size(ioat);
+	const u16 active = ioat2_ring_active(ioat);
+	const u32 new_size = 1 << order;
+	struct ioat_ring_ent **ring;
+	u16 i;
+
+	if (order > ioat_get_max_alloc_order())
+		return false;
+
+	/* double check that we have at least 1 free descriptor */
+	if (active == curr_size)
+		return false;
+
+	/* when shrinking, verify that we can hold the current active
+	 * set in the new ring
+	 */
+	if (active >= new_size)
+		return false;
+
+	/* allocate the array to hold the software ring */
+	ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+	if (!ring)
+		return false;
+
+	/* allocate/trim descriptors as needed */
+	if (new_size > curr_size) {
+		/* copy current descriptors to the new ring */
+		for (i = 0; i < curr_size; i++) {
+			u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+			u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+			ring[new_idx] = ioat->ring[curr_idx];
+			set_desc_id(ring[new_idx], new_idx);
+		}
+
+		/* add new descriptors to the ring */
+		for (i = curr_size; i < new_size; i++) {
+			u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+			ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
+			if (!ring[new_idx]) {
+				while (i--) {
+					u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+					ioat2_free_ring_ent(ring[new_idx], c);
+				}
+				kfree(ring);
+				return false;
+			}
+			set_desc_id(ring[new_idx], new_idx);
+		}
+
+		/* hw link new descriptors */
+		for (i = curr_size-1; i < new_size; i++) {
+			u16 new_idx = (ioat->tail+i) & (new_size-1);
+			struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
+			struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
+
+			hw->next = next->txd.phys;
+		}
+	} else {
+		struct ioat_dma_descriptor *hw;
+		struct ioat_ring_ent *next;
+
+		/* copy current descriptors to the new ring, dropping the
+		 * removed descriptors
+		 */
+		for (i = 0; i < new_size; i++) {
+			u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+			u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+			ring[new_idx] = ioat->ring[curr_idx];
+			set_desc_id(ring[new_idx], new_idx);
+		}
+
+		/* free deleted descriptors */
+		for (i = new_size; i < curr_size; i++) {
+			struct ioat_ring_ent *ent;
+
+			ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
+			ioat2_free_ring_ent(ent, c);
+		}
+
+		/* fix up hardware ring */
+		hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
+		next = ring[(ioat->tail+new_size) & (new_size-1)];
+		hw->next = next->txd.phys;
+	}
+
+	dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+		__func__, new_size);
+
+	kfree(ioat->ring);
+	ioat->ring = ring;
+	ioat->alloc_order = order;
+
+	return true;
+}
+
+/**
+ * ioat2_check_space_lock - verify space and grab ring producer lock
+ * @ioat: ioat2,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	bool retry;
+
+ retry:
+	spin_lock_bh(&ioat->prep_lock);
+	/* never allow the last descriptor to be consumed, we need at
+	 * least one free at all times to allow for on-the-fly ring
+	 * resizing.
+	 */
+	if (likely(ioat2_ring_space(ioat) > num_descs)) {
+		dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
+			__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+		ioat->produce = num_descs;
+		return 0;  /* with ioat->prep_lock held */
+	}
+	retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state);
+	spin_unlock_bh(&ioat->prep_lock);
+
+	/* is another cpu already trying to expand the ring? */
+	if (retry)
+		goto retry;
+
+	spin_lock_bh(&chan->cleanup_lock);
+	spin_lock_bh(&ioat->prep_lock);
+	retry = reshape_ring(ioat, ioat->alloc_order + 1);
+	clear_bit(IOAT_RESHAPE_PENDING, &chan->state);
+	spin_unlock_bh(&ioat->prep_lock);
+	spin_unlock_bh(&chan->cleanup_lock);
+
+	/* if we were able to expand the ring retry the allocation */
+	if (retry)
+		goto retry;
+
+	if (printk_ratelimit())
+		dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+			__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+
+	/* progress reclaim in the allocation failure case we may be
+	 * called under bh_disabled so we need to trigger the timer
+	 * event directly
+	 */
+	if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) {
+		struct ioatdma_device *device = chan->device;
+
+		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+		device->timer_fn((unsigned long) &chan->common);
+	}
+
+	return -ENOMEM;
+}
+
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+			   dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_dma_descriptor *hw;
+	struct ioat_ring_ent *desc;
+	dma_addr_t dst = dma_dest;
+	dma_addr_t src = dma_src;
+	size_t total_len = len;
+	int num_descs, idx, i;
+
+	num_descs = ioat2_xferlen_to_descs(ioat, len);
+	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
+		idx = ioat->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		hw = desc->hw;
+
+		hw->size = copy;
+		hw->ctl = 0;
+		hw->src_addr = src;
+		hw->dst_addr = dst;
+
+		len -= copy;
+		dst += copy;
+		src += copy;
+		dump_desc_dbg(ioat, desc);
+	} while (++i < num_descs);
+
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	hw->ctl_f.compl_write = 1;
+	dump_desc_dbg(ioat, desc);
+	/* we leave the channel locked to ensure in order submission */
+
+	return &desc->txd;
+}
+
+/**
+ * ioat2_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+void ioat2_free_chan_resources(struct dma_chan *c)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioatdma_device *device = chan->device;
+	struct ioat_ring_ent *desc;
+	const u16 total_descs = 1 << ioat->alloc_order;
+	int descs;
+	int i;
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (!ioat->ring)
+		return;
+
+	tasklet_disable(&chan->cleanup_task);
+	del_timer_sync(&chan->timer);
+	device->cleanup_fn((unsigned long) c);
+	device->reset_hw(chan);
+	clear_bit(IOAT_RUN, &chan->state);
+
+	spin_lock_bh(&chan->cleanup_lock);
+	spin_lock_bh(&ioat->prep_lock);
+	descs = ioat2_ring_space(ioat);
+	dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
+	for (i = 0; i < descs; i++) {
+		desc = ioat2_get_ring_ent(ioat, ioat->head + i);
+		ioat2_free_ring_ent(desc, c);
+	}
+
+	if (descs < total_descs)
+		dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+			total_descs - descs);
+
+	for (i = 0; i < total_descs - descs; i++) {
+		desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+		dump_desc_dbg(ioat, desc);
+		ioat2_free_ring_ent(desc, c);
+	}
+
+	kfree(ioat->ring);
+	ioat->ring = NULL;
+	ioat->alloc_order = 0;
+	pci_pool_free(device->completion_pool, chan->completion,
+		      chan->completion_dma);
+	spin_unlock_bh(&ioat->prep_lock);
+	spin_unlock_bh(&chan->cleanup_lock);
+
+	chan->last_completion = 0;
+	chan->completion_dma = 0;
+	ioat->dmacount = 0;
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+	return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+	/* ...taken outside the lock, no need to be precise */
+	return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat2_attrs[] = {
+	&ring_size_attr.attr,
+	&ring_active_attr.attr,
+	&ioat_cap_attr.attr,
+	&ioat_version_attr.attr,
+	NULL,
+};
+
+struct kobj_type ioat2_ktype = {
+	.sysfs_ops = &ioat_sysfs_ops,
+	.default_attrs = ioat2_attrs,
+};
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+{
+	struct pci_dev *pdev = device->pdev;
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioat_chan_common *chan;
+	int err;
+
+	device->enumerate_channels = ioat2_enumerate_channels;
+	device->reset_hw = ioat2_reset_hw;
+	device->cleanup_fn = ioat2_cleanup_event;
+	device->timer_fn = ioat2_timer_event;
+	device->self_test = ioat_dma_self_test;
+	dma = &device->common;
+	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+	dma->device_issue_pending = ioat2_issue_pending;
+	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+	dma->device_free_chan_resources = ioat2_free_chan_resources;
+	dma->device_tx_status = ioat_dma_tx_status;
+
+	err = ioat_probe(device);
+	if (err)
+		return err;
+	ioat_set_tcp_copy_break(2048);
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		chan = to_chan_common(c);
+		writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
+		       chan->reg_base + IOAT_DCACTRL_OFFSET);
+	}
+
+	err = ioat_register(device);
+	if (err)
+		return err;
+
+	ioat_kobject_add(device, &ioat2_ktype);
+
+	if (dca)
+		device->dca = ioat2_dca_init(pdev, device->reg_base);
+
+	return err;
+}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644
index 00000000..be2a55b9
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_V2_H
+#define IOATDMA_V2_H
+
+#include <linux/dmaengine.h>
+#include <linux/circ_buf.h>
+#include "dma.h"
+#include "hw.h"
+
+
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+	(min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+	(min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
+ * @base: common ioat channel parameters
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @produce: number of descriptors to produce at submit time
+ * @ring: software ring buffer implementation of hardware ring
+ * @prep_lock: serializes descriptor preparation (producers)
+ */
+struct ioat2_dma_chan {
+	struct ioat_chan_common base;
+	size_t xfercap_log;
+	u16 head;
+	u16 issued;
+	u16 tail;
+	u16 dmacount;
+	u16 alloc_order;
+	u16 produce;
+	struct ioat_ring_ent **ring;
+	spinlock_t prep_lock;
+};
+
+static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
+{
+	struct ioat_chan_common *chan = to_chan_common(c);
+
+	return container_of(chan, struct ioat2_dma_chan, base);
+}
+
+static inline u32 ioat2_ring_size(struct ioat2_dma_chan *ioat)
+{
+	return 1 << ioat->alloc_order;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
+{
+	return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat));
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
+{
+	return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat));
+}
+
+static inline u32 ioat2_ring_space(struct ioat2_dma_chan *ioat)
+{
+	return ioat2_ring_size(ioat) - ioat2_ring_active(ioat);
+}
+
+static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
+{
+	u16 num_descs = len >> ioat->xfercap_log;
+
+	num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
+	return num_descs;
+}
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @fill: hardware fill descriptor
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ */
+
+struct ioat_ring_ent {
+	union {
+		struct ioat_dma_descriptor *hw;
+		struct ioat_fill_descriptor *fill;
+		struct ioat_xor_descriptor *xor;
+		struct ioat_xor_ext_descriptor *xor_ex;
+		struct ioat_pq_descriptor *pq;
+		struct ioat_pq_ext_descriptor *pq_ex;
+		struct ioat_pq_update_descriptor *pqu;
+		struct ioat_raw_descriptor *raw;
+	};
+	size_t len;
+	struct dma_async_tx_descriptor txd;
+	enum sum_check_flags *result;
+	#ifdef DEBUG
+	int id;
+	#endif
+};
+
+static inline struct ioat_ring_ent *
+ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
+{
+	return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)];
+}
+
+static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+
+	writel(addr & 0x00000000FFFFFFFF,
+	       chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+	writel(addr >> 32,
+	       chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
+int ioat2_enumerate_channels(struct ioatdma_device *device);
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+			   dma_addr_t dma_src, size_t len, unsigned long flags);
+void ioat2_issue_pending(struct dma_chan *chan);
+int ioat2_alloc_chan_resources(struct dma_chan *c);
+void ioat2_free_chan_resources(struct dma_chan *c);
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+void ioat2_cleanup_event(unsigned long data);
+void ioat2_timer_event(unsigned long data);
+int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo);
+int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
+extern struct kobj_type ioat2_ktype;
+extern struct kmem_cache *ioat2_cache;
+#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 00000000..f7f1dc62
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1302 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Support routines for v3+ hardware
+ */
+
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+#include "dma_v2.h"
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc = 0xe0;
+static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+
+static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+	return raw->field[xor_idx_to_field[idx]];
+}
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+			dma_addr_t addr, u32 offset, int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+	raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+	return raw->field[pq_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+		       dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+	raw->field[pq_idx_to_field[idx]] = addr + offset;
+	pq->coef[idx] = coef;
+}
+
+static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
+			    struct ioat_ring_ent *desc, int idx)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	struct pci_dev *pdev = chan->device->pdev;
+	size_t len = desc->len;
+	size_t offset = len - desc->hw->size;
+	struct dma_async_tx_descriptor *tx = &desc->txd;
+	enum dma_ctrl_flags flags = tx->flags;
+
+	switch (desc->hw->ctl_f.op) {
+	case IOAT_OP_COPY:
+		if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
+			ioat_dma_unmap(chan, flags, len, desc->hw);
+		break;
+	case IOAT_OP_FILL: {
+		struct ioat_fill_descriptor *hw = desc->fill;
+
+		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+			ioat_unmap(pdev, hw->dst_addr - offset, len,
+				   PCI_DMA_FROMDEVICE, flags, 1);
+		break;
+	}
+	case IOAT_OP_XOR_VAL:
+	case IOAT_OP_XOR: {
+		struct ioat_xor_descriptor *xor = desc->xor;
+		struct ioat_ring_ent *ext;
+		struct ioat_xor_ext_descriptor *xor_ex = NULL;
+		int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
+		struct ioat_raw_descriptor *descs[2];
+		int i;
+
+		if (src_cnt > 5) {
+			ext = ioat2_get_ring_ent(ioat, idx + 1);
+			xor_ex = ext->xor_ex;
+		}
+
+		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			descs[0] = (struct ioat_raw_descriptor *) xor;
+			descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+			for (i = 0; i < src_cnt; i++) {
+				dma_addr_t src = xor_get_src(descs, i);
+
+				ioat_unmap(pdev, src - offset, len,
+					   PCI_DMA_TODEVICE, flags, 0);
+			}
+
+			/* dest is a source in xor validate operations */
+			if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
+				ioat_unmap(pdev, xor->dst_addr - offset, len,
+					   PCI_DMA_TODEVICE, flags, 1);
+				break;
+			}
+		}
+
+		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+			ioat_unmap(pdev, xor->dst_addr - offset, len,
+				   PCI_DMA_FROMDEVICE, flags, 1);
+		break;
+	}
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ: {
+		struct ioat_pq_descriptor *pq = desc->pq;
+		struct ioat_ring_ent *ext;
+		struct ioat_pq_ext_descriptor *pq_ex = NULL;
+		int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+		struct ioat_raw_descriptor *descs[2];
+		int i;
+
+		if (src_cnt > 3) {
+			ext = ioat2_get_ring_ent(ioat, idx + 1);
+			pq_ex = ext->pq_ex;
+		}
+
+		/* in the 'continue' case don't unmap the dests as sources */
+		if (dmaf_p_disabled_continue(flags))
+			src_cnt--;
+		else if (dmaf_continue(flags))
+			src_cnt -= 3;
+
+		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			descs[0] = (struct ioat_raw_descriptor *) pq;
+			descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+			for (i = 0; i < src_cnt; i++) {
+				dma_addr_t src = pq_get_src(descs, i);
+
+				ioat_unmap(pdev, src - offset, len,
+					   PCI_DMA_TODEVICE, flags, 0);
+			}
+
+			/* the dests are sources in pq validate operations */
+			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+				if (!(flags & DMA_PREP_PQ_DISABLE_P))
+					ioat_unmap(pdev, pq->p_addr - offset,
+						   len, PCI_DMA_TODEVICE, flags, 0);
+				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+					ioat_unmap(pdev, pq->q_addr - offset,
+						   len, PCI_DMA_TODEVICE, flags, 0);
+				break;
+			}
+		}
+
+		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+			if (!(flags & DMA_PREP_PQ_DISABLE_P))
+				ioat_unmap(pdev, pq->p_addr - offset, len,
+					   PCI_DMA_BIDIRECTIONAL, flags, 1);
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				ioat_unmap(pdev, pq->q_addr - offset, len,
+					   PCI_DMA_BIDIRECTIONAL, flags, 1);
+		}
+		break;
+	}
+	default:
+		dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
+			__func__, desc->hw->ctl_f.op);
+	}
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+	struct ioat_dma_descriptor *hw = desc->hw;
+
+	if (hw->ctl_f.op == IOAT_OP_XOR ||
+	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+		struct ioat_xor_descriptor *xor = desc->xor;
+
+		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+			return true;
+	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
+		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+		struct ioat_pq_descriptor *pq = desc->pq;
+
+		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ *
+ * The difference from the dma_v2.c __cleanup() is that this routine
+ * handles extended descriptors and dma-unmapping raid operations.
+ */
+static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioat_ring_ent *desc;
+	bool seen_current = false;
+	int idx = ioat->tail, i;
+	u16 active;
+
+	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat->head, ioat->tail, ioat->issued);
+
+	active = ioat2_ring_active(ioat);
+	for (i = 0; i < active && !seen_current; i++) {
+		struct dma_async_tx_descriptor *tx;
+
+		smp_read_barrier_depends();
+		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		dump_desc_dbg(ioat, desc);
+		tx = &desc->txd;
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			ioat3_dma_unmap(ioat, desc, idx + i);
+			if (tx->callback) {
+				tx->callback(tx->callback_param);
+				tx->callback = NULL;
+			}
+		}
+
+		if (tx->phys == phys_complete)
+			seen_current = true;
+
+		/* skip extended descriptors */
+		if (desc_has_ext(desc)) {
+			BUG_ON(i + 1 >= active);
+			i++;
+		}
+	}
+	smp_mb(); /* finish all descriptor reads before incrementing tail */
+	ioat->tail = idx + i;
+	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
+	chan->last_completion = phys_complete;
+
+	if (active - i == 0) {
+		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+			__func__);
+		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+	}
+	/* 5 microsecond delay per pending descriptor */
+	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
+	       chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
+}
+
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	dma_addr_t phys_complete;
+
+	spin_lock_bh(&chan->cleanup_lock);
+	if (ioat_cleanup_preamble(chan, &phys_complete))
+		__cleanup(ioat, phys_complete);
+	spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_event(unsigned long data)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
+
+	ioat3_cleanup(ioat);
+	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	dma_addr_t phys_complete;
+
+	ioat2_quiesce(chan, 0);
+	if (ioat_cleanup_preamble(chan, &phys_complete))
+		__cleanup(ioat, phys_complete);
+
+	__ioat2_restart_chan(ioat);
+}
+
+static void ioat3_timer_event(unsigned long data)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
+	struct ioat_chan_common *chan = &ioat->base;
+
+	if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+		dma_addr_t phys_complete;
+		u64 status;
+
+		status = ioat_chansts(chan);
+
+		/* when halted due to errors check for channel
+		 * programming errors before advancing the completion state
+		 */
+		if (is_ioat_halted(status)) {
+			u32 chanerr;
+
+			chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+			dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
+				__func__, chanerr);
+			if (test_bit(IOAT_RUN, &chan->state))
+				BUG_ON(is_ioat_bug(chanerr));
+			else /* we never got off the ground */
+				return;
+		}
+
+		/* if we haven't made progress and we have already
+		 * acknowledged a pending completion once, then be more
+		 * forceful with a restart
+		 */
+		spin_lock_bh(&chan->cleanup_lock);
+		if (ioat_cleanup_preamble(chan, &phys_complete))
+			__cleanup(ioat, phys_complete);
+		else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
+			spin_lock_bh(&ioat->prep_lock);
+			ioat3_restart_channel(ioat);
+			spin_unlock_bh(&ioat->prep_lock);
+		} else {
+			set_bit(IOAT_COMPLETION_ACK, &chan->state);
+			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+		}
+		spin_unlock_bh(&chan->cleanup_lock);
+	} else {
+		u16 active;
+
+		/* if the ring is idle, empty, and oversized try to step
+		 * down the size
+		 */
+		spin_lock_bh(&chan->cleanup_lock);
+		spin_lock_bh(&ioat->prep_lock);
+		active = ioat2_ring_active(ioat);
+		if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+			reshape_ring(ioat, ioat->alloc_order-1);
+		spin_unlock_bh(&ioat->prep_lock);
+		spin_unlock_bh(&chan->cleanup_lock);
+
+		/* keep shrinking until we get back to our minimum
+		 * default size
+		 */
+		if (ioat->alloc_order > ioat_get_alloc_order())
+			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+	}
+}
+
+static enum dma_status
+ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	ioat3_cleanup(ioat);
+
+	return dma_cookie_status(c, cookie, txstate);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
+		       size_t len, unsigned long flags)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_ring_ent *desc;
+	size_t total_len = len;
+	struct ioat_fill_descriptor *fill;
+	u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
+	int num_descs, idx, i;
+
+	num_descs = ioat2_xferlen_to_descs(ioat, len);
+	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
+		idx = ioat->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		fill = desc->fill;
+
+		fill->size = xfer_size;
+		fill->src_data = src_data;
+		fill->dst_addr = dest;
+		fill->ctl = 0;
+		fill->ctl_f.op = IOAT_OP_FILL;
+
+		len -= xfer_size;
+		dest += xfer_size;
+		dump_desc_dbg(ioat, desc);
+	} while (++i < num_descs);
+
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	fill->ctl_f.compl_write = 1;
+	dump_desc_dbg(ioat, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+		      size_t len, unsigned long flags)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_ring_ent *compl_desc;
+	struct ioat_ring_ent *desc;
+	struct ioat_ring_ent *ext;
+	size_t total_len = len;
+	struct ioat_xor_descriptor *xor;
+	struct ioat_xor_ext_descriptor *xor_ex = NULL;
+	struct ioat_dma_descriptor *hw;
+	int num_descs, with_ext, idx, i;
+	u32 offset = 0;
+	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+	BUG_ON(src_cnt < 2);
+
+	num_descs = ioat2_xferlen_to_descs(ioat, len);
+	/* we need 2x the number of descriptors to cover greater than 5
+	 * sources
+	 */
+	if (src_cnt > 5) {
+		with_ext = 1;
+		num_descs *= 2;
+	} else
+		with_ext = 0;
+
+	/* completion writes from the raid engine may pass completion
+	 * writes from the legacy engine, so we need one extra null
+	 * (legacy) descriptor to ensure all completion writes arrive in
+	 * order.
+	 */
+	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
+		idx = ioat->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		struct ioat_raw_descriptor *descs[2];
+		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+		int s;
+
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		xor = desc->xor;
+
+		/* save a branch by unconditionally retrieving the
+		 * extended descriptor xor_set_src() knows to not write
+		 * to it in the single descriptor case
+		 */
+		ext = ioat2_get_ring_ent(ioat, idx + i + 1);
+		xor_ex = ext->xor_ex;
+
+		descs[0] = (struct ioat_raw_descriptor *) xor;
+		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+		for (s = 0; s < src_cnt; s++)
+			xor_set_src(descs, src[s], offset, s);
+		xor->size = xfer_size;
+		xor->dst_addr = dest + offset;
+		xor->ctl = 0;
+		xor->ctl_f.op = op;
+		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+		len -= xfer_size;
+		offset += xfer_size;
+		dump_desc_dbg(ioat, desc);
+	} while ((i += 1 + with_ext) < num_descs);
+
+	/* last xor descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+	/* completion descriptor carries interrupt bit */
+	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+	hw = compl_desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	dump_desc_dbg(ioat, compl_desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+	       unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len,
+		    enum sum_check_flags *result, unsigned long flags)
+{
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*result = 0;
+
+	return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
+				     src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
+{
+	struct device *dev = to_dev(&ioat->base);
+	struct ioat_pq_descriptor *pq = desc->pq;
+	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+	int i;
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+		" sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+		desc_id(desc), (unsigned long long) desc->txd.phys,
+		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
+		pq->ctl_f.compl_write,
+		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+		pq->ctl_f.src_cnt);
+	for (i = 0; i < src_cnt; i++)
+		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+		     const dma_addr_t *dst, const dma_addr_t *src,
+		     unsigned int src_cnt, const unsigned char *scf,
+		     size_t len, unsigned long flags)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioat_ring_ent *compl_desc;
+	struct ioat_ring_ent *desc;
+	struct ioat_ring_ent *ext;
+	size_t total_len = len;
+	struct ioat_pq_descriptor *pq;
+	struct ioat_pq_ext_descriptor *pq_ex = NULL;
+	struct ioat_dma_descriptor *hw;
+	u32 offset = 0;
+	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+	int i, s, idx, with_ext, num_descs;
+
+	dev_dbg(to_dev(chan), "%s\n", __func__);
+	/* the engine requires at least two sources (we provide
+	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
+	 */
+	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+	num_descs = ioat2_xferlen_to_descs(ioat, len);
+	/* we need 2x the number of descriptors to cover greater than 3
+	 * sources (we need 1 extra source in the q-only continuation
+	 * case and 3 extra sources in the p+q continuation case.
+	 */
+	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
+	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
+		with_ext = 1;
+		num_descs *= 2;
+	} else
+		with_ext = 0;
+
+	/* completion writes from the raid engine may pass completion
+	 * writes from the legacy engine, so we need one extra null
+	 * (legacy) descriptor to ensure all completion writes arrive in
+	 * order.
+	 */
+	if (likely(num_descs) &&
+	    ioat2_check_space_lock(ioat, num_descs+1) == 0)
+		idx = ioat->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		struct ioat_raw_descriptor *descs[2];
+		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		pq = desc->pq;
+
+		/* save a branch by unconditionally retrieving the
+		 * extended descriptor pq_set_src() knows to not write
+		 * to it in the single descriptor case
+		 */
+		ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
+		pq_ex = ext->pq_ex;
+
+		descs[0] = (struct ioat_raw_descriptor *) pq;
+		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+		for (s = 0; s < src_cnt; s++)
+			pq_set_src(descs, src[s], offset, scf[s], s);
+
+		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
+		if (dmaf_p_disabled_continue(flags))
+			pq_set_src(descs, dst[1], offset, 1, s++);
+		else if (dmaf_continue(flags)) {
+			pq_set_src(descs, dst[0], offset, 0, s++);
+			pq_set_src(descs, dst[1], offset, 1, s++);
+			pq_set_src(descs, dst[1], offset, 0, s++);
+		}
+		pq->size = xfer_size;
+		pq->p_addr = dst[0] + offset;
+		pq->q_addr = dst[1] + offset;
+		pq->ctl = 0;
+		pq->ctl_f.op = op;
+		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+		len -= xfer_size;
+		offset += xfer_size;
+	} while ((i += 1 + with_ext) < num_descs);
+
+	/* last pq descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	dump_pq_desc_dbg(ioat, desc, ext);
+
+	/* completion descriptor carries interrupt bit */
+	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+	hw = compl_desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	dump_desc_dbg(ioat, compl_desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+	      unsigned int src_cnt, const unsigned char *scf, size_t len,
+	      unsigned long flags)
+{
+	/* specify valid address for disabled result */
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		dst[0] = dst[1];
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		dst[1] = dst[0];
+
+	/* handle the single source multiply case from the raid6
+	 * recovery path
+	 */
+	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
+		dma_addr_t single_source[2];
+		unsigned char single_source_coef[2];
+
+		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+		single_source[0] = src[0];
+		single_source[1] = src[0];
+		single_source_coef[0] = scf[0];
+		single_source_coef[1] = 0;
+
+		return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+					    single_source_coef, len, flags);
+	} else
+		return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
+					    len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		  unsigned int src_cnt, const unsigned char *scf, size_t len,
+		  enum sum_check_flags *pqres, unsigned long flags)
+{
+	/* specify valid address for disabled result */
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		pq[0] = pq[1];
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		pq[1] = pq[0];
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*pqres = 0;
+
+	return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				    flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+		 unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	unsigned char scf[src_cnt];
+	dma_addr_t pq[2];
+
+	memset(scf, 0, src_cnt);
+	pq[0] = dst;
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+	pq[1] = dst; /* specify valid address for disabled result */
+
+	return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				    flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+		     unsigned int src_cnt, size_t len,
+		     enum sum_check_flags *result, unsigned long flags)
+{
+	unsigned char scf[src_cnt];
+	dma_addr_t pq[2];
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*result = 0;
+
+	memset(scf, 0, src_cnt);
+	pq[0] = src[0];
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+	pq[1] = pq[0]; /* specify valid address for disabled result */
+
+	return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
+				    len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_ring_ent *desc;
+	struct ioat_dma_descriptor *hw;
+
+	if (ioat2_check_space_lock(ioat, 1) == 0)
+		desc = ioat2_get_ring_ent(ioat, ioat->head);
+	else
+		return NULL;
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+
+	desc->txd.flags = flags;
+	desc->len = 1;
+
+	dump_desc_dbg(ioat, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
+static void __devinit ioat3_dma_test_callback(void *dma_async_param)
+{
+	struct completion *cmp = dma_async_param;
+
+	complete(cmp);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+	dma_addr_t dma_addr, dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 xor_val_result;
+	int err = 0;
+	struct completion cmp;
+	unsigned long tmo;
+	struct device *dev = &device->pdev->dev;
+	struct dma_device *dma = &device->common;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+		return 0;
+
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+			(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(dma->channels.next, struct dma_chan,
+				device_node);
+	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
+				      DMA_PREP_INTERRUPT);
+
+	if (!tx) {
+		dev_err(dev, "Self-test xor prep failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat3_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test xor setup failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_err(dev, "Self-test xor timed out\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_err(dev, "Self-test xor failed compare\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
+
+	/* skip validate if the capability is not present */
+	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	/* validate the sources with the destintation page */
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		xor_val_srcs[i] = xor_srcs[i];
+	xor_val_srcs[i] = dest;
+
+	xor_val_result = 1;
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+					  &xor_val_result, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test zero prep failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat3_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test zero setup failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_err(dev, "Self-test validate timed out\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (xor_val_result != 0) {
+		dev_err(dev, "Self-test validate failed compare\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* skip memset if the capability is not present */
+	if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	/* test memset */
+	dma_addr = dma_map_page(dev, dest, 0,
+			PAGE_SIZE, DMA_FROM_DEVICE);
+	tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+					 DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test memset prep failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat3_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test memset setup failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_err(dev, "Self-test memset timed out\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i]) {
+			dev_err(dev, "Self-test memset failed compare\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+	/* test for non-zero parity sum */
+	xor_val_result = 0;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+					  &xor_val_result, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test 2nd zero prep failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat3_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test  2nd zero setup failed\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_err(dev, "Self-test 2nd validate timed out\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (xor_val_result != SUM_CHECK_P_RESULT) {
+		dev_err(dev, "Self-test validate failed compare\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	dma->device_free_chan_resources(dma_chan);
+out:
+	src_idx = IOAT_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
+{
+	int rc = ioat_dma_self_test(device);
+
+	if (rc)
+		return rc;
+
+	rc = ioat_xor_val_self_test(device);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static int ioat3_reset_hw(struct ioat_chan_common *chan)
+{
+	/* throw away whatever the channel was doing and get it
+	 * initialized, with ioat3 specific workarounds
+	 */
+	struct ioatdma_device *device = chan->device;
+	struct pci_dev *pdev = device->pdev;
+	u32 chanerr;
+	u16 dev_id;
+	int err;
+
+	ioat2_quiesce(chan, msecs_to_jiffies(100));
+
+	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+
+	/* -= IOAT ver.3 workarounds =- */
+	/* Write CHANERRMSK_INT with 3E07h to mask out the errors
+	 * that can cause stability issues for IOAT ver.3, and clear any
+	 * pending errors
+	 */
+	pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+	err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+	if (err) {
+		dev_err(&pdev->dev, "channel error register unreachable\n");
+		return err;
+	}
+	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+	/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+	 * (workaround for spurious config parity error after restart)
+	 */
+	pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+	if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+		pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+	return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+}
+
+static bool is_jf_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_snb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+{
+	struct pci_dev *pdev = device->pdev;
+	int dca_en = system_has_dca_enabled(pdev);
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioat_chan_common *chan;
+	bool is_raid_device = false;
+	int err;
+	u32 cap;
+
+	device->enumerate_channels = ioat2_enumerate_channels;
+	device->reset_hw = ioat3_reset_hw;
+	device->self_test = ioat3_dma_self_test;
+	dma = &device->common;
+	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+	dma->device_issue_pending = ioat2_issue_pending;
+	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+	dma->device_free_chan_resources = ioat2_free_chan_resources;
+
+	if (is_jf_ioat(pdev) || is_snb_ioat(pdev))
+		dma->copy_align = 6;
+
+	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
+
+	cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+
+	/* dca is incompatible with raid operations */
+	if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+		cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+
+	if (cap & IOAT_CAP_XOR) {
+		is_raid_device = true;
+		dma->max_xor = 8;
+		dma->xor_align = 6;
+
+		dma_cap_set(DMA_XOR, dma->cap_mask);
+		dma->device_prep_dma_xor = ioat3_prep_xor;
+
+		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+		dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
+	}
+	if (cap & IOAT_CAP_PQ) {
+		is_raid_device = true;
+		dma_set_maxpq(dma, 8, 0);
+		dma->pq_align = 6;
+
+		dma_cap_set(DMA_PQ, dma->cap_mask);
+		dma->device_prep_dma_pq = ioat3_prep_pq;
+
+		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+		dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+
+		if (!(cap & IOAT_CAP_XOR)) {
+			dma->max_xor = 8;
+			dma->xor_align = 6;
+
+			dma_cap_set(DMA_XOR, dma->cap_mask);
+			dma->device_prep_dma_xor = ioat3_prep_pqxor;
+
+			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+			dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+		}
+	}
+	if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+		dma_cap_set(DMA_MEMSET, dma->cap_mask);
+		dma->device_prep_dma_memset = ioat3_prep_memset_lock;
+	}
+
+
+	if (is_raid_device) {
+		dma->device_tx_status = ioat3_tx_status;
+		device->cleanup_fn = ioat3_cleanup_event;
+		device->timer_fn = ioat3_timer_event;
+	} else {
+		dma->device_tx_status = ioat_dma_tx_status;
+		device->cleanup_fn = ioat2_cleanup_event;
+		device->timer_fn = ioat2_timer_event;
+	}
+
+	#ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+	dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
+	dma->device_prep_dma_pq_val = NULL;
+	#endif
+
+	#ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+	dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
+	dma->device_prep_dma_xor_val = NULL;
+	#endif
+
+	err = ioat_probe(device);
+	if (err)
+		return err;
+	ioat_set_tcp_copy_break(262144);
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		chan = to_chan_common(c);
+		writel(IOAT_DMA_DCA_ANY_CPU,
+		       chan->reg_base + IOAT_DCACTRL_OFFSET);
+	}
+
+	err = ioat_register(device);
+	if (err)
+		return err;
+
+	ioat_kobject_add(device, &ioat2_ktype);
+
+	if (dca)
+		device->dca = ioat3_dca_init(pdev, device->reg_base);
+
+	return 0;
+}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 00000000..60e67545
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID            0x8086
+#define IOAT_MMIO_BAR		0
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000       0x1A38
+#define IOAT_PCI_DID_CNB        0x360B
+#define IOAT_PCI_DID_SCNB       0x65FF
+#define IOAT_PCI_DID_SNB        0x402F
+
+#define IOAT_PCI_RID            0x00
+#define IOAT_PCI_SVID           0x8086
+#define IOAT_PCI_SID            0x8086
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+
+int system_has_dca_enabled(struct pci_dev *pdev);
+
+struct ioat_dma_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int null:1;
+			unsigned int src_brk:1;
+			unsigned int dest_brk:1;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int rsvd2:13;
+			#define IOAT_OP_COPY 0x00
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	dst_addr;
+	uint64_t	next;
+	uint64_t	rsv1;
+	uint64_t	rsv2;
+	/* store some driver data in an unused portion of the descriptor */
+	union {
+		uint64_t	user1;
+		uint64_t	tx_cnt;
+	};
+	uint64_t	user2;
+};
+
+struct ioat_fill_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int rsvd:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int rsvd2:2;
+			unsigned int dest_brk:1;
+			unsigned int bundle:1;
+			unsigned int rsvd4:15;
+			#define IOAT_OP_FILL 0x01
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_data;
+	uint64_t	dst_addr;
+	uint64_t	next;
+	uint64_t	rsv1;
+	uint64_t	next_dst_addr;
+	uint64_t	user1;
+	uint64_t	user2;
+};
+
+struct ioat_xor_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int src_cnt:3;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int rsvd:13;
+			#define IOAT_OP_XOR 0x87
+			#define IOAT_OP_XOR_VAL 0x88
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	dst_addr;
+	uint64_t	next;
+	uint64_t	src_addr2;
+	uint64_t	src_addr3;
+	uint64_t	src_addr4;
+	uint64_t	src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+	uint64_t	src_addr6;
+	uint64_t	src_addr7;
+	uint64_t	src_addr8;
+	uint64_t	next;
+	uint64_t	rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int src_cnt:3;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int p_disable:1;
+			unsigned int q_disable:1;
+			unsigned int rsvd:11;
+			#define IOAT_OP_PQ 0x89
+			#define IOAT_OP_PQ_VAL 0x8a
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	p_addr;
+	uint64_t	next;
+	uint64_t	src_addr2;
+	uint64_t	src_addr3;
+	uint8_t		coef[8];
+	uint64_t	q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+	uint64_t	src_addr4;
+	uint64_t	src_addr5;
+	uint64_t	src_addr6;
+	uint64_t	next;
+	uint64_t	src_addr7;
+	uint64_t	src_addr8;
+	uint64_t	rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int src_cnt:3;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int p_disable:1;
+			unsigned int q_disable:1;
+			unsigned int rsvd:3;
+			unsigned int coef:8;
+			#define IOAT_OP_PQ_UP 0x8b
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	p_addr;
+	uint64_t	next;
+	uint64_t	src_addr2;
+	uint64_t	p_src;
+	uint64_t	q_src;
+	uint64_t	q_addr;
+};
+
+struct ioat_raw_descriptor {
+	uint64_t	field[8];
+};
+#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644
index 00000000..5e3a40f7
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,217 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include <linux/slab.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+	/* I/OAT v1 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+	{ PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+	/* I/OAT v2 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+	/* I/OAT v3 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+	/* I/OAT v3.2 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
+
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev,
+				    const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+struct kmem_cache *ioat2_cache;
+
+#define DRV_NAME "ioatdma"
+
+static struct pci_driver ioat_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= ioat_pci_tbl,
+	.probe		= ioat_pci_probe,
+	.remove		= __devexit_p(ioat_remove),
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct device *dev = &pdev->dev;
+	struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+	if (!d)
+		return NULL;
+	d->pdev = pdev;
+	d->reg_base = iobase;
+	return d;
+}
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	void __iomem * const *iomap;
+	struct device *dev = &pdev->dev;
+	struct ioatdma_device *device;
+	int err;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+	if (err)
+		return err;
+	iomap = pcim_iomap_table(pdev);
+	if (!iomap)
+		return -ENOMEM;
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err)
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+	if (!device)
+		return -ENOMEM;
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, device);
+
+	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+	if (device->version == IOAT_VER_1_2)
+		err = ioat1_dma_probe(device, ioat_dca_enabled);
+	else if (device->version == IOAT_VER_2_0)
+		err = ioat2_dma_probe(device, ioat_dca_enabled);
+	else if (device->version >= IOAT_VER_3_0)
+		err = ioat3_dma_probe(device, ioat_dca_enabled);
+	else
+		return -ENODEV;
+
+	if (err) {
+		dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+	struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+	if (!device)
+		return;
+
+	dev_err(&pdev->dev, "Removing dma and dca services\n");
+	if (device->dca) {
+		unregister_dca_provider(device->dca, &pdev->dev);
+		free_dca_provider(device->dca);
+		device->dca = NULL;
+	}
+	ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+	int err;
+
+	pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+		DRV_NAME, IOAT_DMA_VERSION);
+
+	ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+					0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ioat2_cache)
+		return -ENOMEM;
+
+	err = pci_register_driver(&ioat_pci_driver);
+	if (err)
+		kmem_cache_destroy(ioat2_cache);
+
+	return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+	pci_unregister_driver(&ioat_pci_driver);
+	kmem_cache_destroy(ioat2_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
new file mode 100644
index 00000000..13917985
--- /dev/null
+++ b/drivers/dma/ioat/registers.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET			0x48
+#define IOAT_PCI_DMACTRL_DMA_EN			0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN			0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET		0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET		0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET		0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET		0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET			0x01	/*  8-bit */
+#define IOAT_XFERCAP_4KB			12
+#define IOAT_XFERCAP_8KB			13
+#define IOAT_XFERCAP_16KB			14
+#define IOAT_XFERCAP_32KB			15
+#define IOAT_XFERCAP_32GB			0
+
+#define IOAT_GENCTRL_OFFSET			0x02	/*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN			0x01
+
+#define IOAT_INTRCTRL_OFFSET			0x03	/*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN		0x01	/* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS		0x02	/* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT			0x04	/* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL	0x08	/* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET			0x04	/* Each bit is a channel */
+
+#define IOAT_VER_OFFSET				0x08	/*  8-bit */
+#define IOAT_VER_MAJOR_MASK			0xF0
+#define IOAT_VER_MINOR_MASK			0x0F
+#define GET_IOAT_VER_MAJOR(x)			(((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)			((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET		0x0A	/* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET			0x0C	/* 16-bit */
+#define IOAT_INTRDELAY_MASK			0x3FFF	/* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT		0x8000	/* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET		0x0E	/* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE	0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED		0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS		0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING		0x0008
+
+#define IOAT_DMA_CAP_OFFSET			0x10	/* 32-bit */
+#define IOAT_CAP_PAGE_BREAK			0x00000001
+#define IOAT_CAP_CRC				0x00000002
+#define IOAT_CAP_SKIP_MARKER			0x00000004
+#define IOAT_CAP_DCA				0x00000010
+#define IOAT_CAP_CRC_MOVE			0x00000020
+#define IOAT_CAP_FILL_BLOCK			0x00000040
+#define IOAT_CAP_APIC				0x00000080
+#define IOAT_CAP_XOR				0x00000100
+#define IOAT_CAP_PQ				0x00000200
+
+#define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET			0x00	/* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN		0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN		0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
+#define IOAT_CHANCTRL_INT_REARM			0x0001
+#define IOAT_CHANCTRL_RUN			(IOAT_CHANCTRL_INT_REARM |\
+						 IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET			0x02	/* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET		0x04	/* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW	0x04
+#define IOAT2_CHANSTS_OFFSET_LOW	0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH	0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH	0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR			0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR		0x8ULL
+#define IOAT_CHANSTS_STATUS	0x7ULL
+#define IOAT_CHANSTS_ACTIVE	0x0
+#define IOAT_CHANSTS_DONE	0x1
+#define IOAT_CHANSTS_SUSPENDED	0x2
+#define IOAT_CHANSTS_HALTED	0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET	0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET		0x0C	/* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET		0x10	/* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW	0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW	0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH	0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH	0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET		0x14	/*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET		0x04	/*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET			0x20
+#define IOAT_CHANCMD_RESUME			0x10
+#define IOAT_CHANCMD_ABORT			0x08
+#define IOAT_CHANCMD_SUSPEND			0x04
+#define IOAT_CHANCMD_APPEND			0x02
+#define IOAT_CHANCMD_START			0x01
+
+#define IOAT_CHANCMP_OFFSET			0x18	/* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW			0x18
+#define IOAT_CHANCMP_OFFSET_HIGH		0x1C
+
+#define IOAT_CDAR_OFFSET			0x20	/* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW			0x20
+#define IOAT_CDAR_OFFSET_HIGH			0x24
+
+#define IOAT_CHANERR_OFFSET			0x28	/* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR	0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR	0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR	0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR	0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR	0x0010
+#define IOAT_CHANERR_CHANCMD_ERR		0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0080
+#define IOAT_CHANERR_READ_DATA_ERR		0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR		0x0200
+#define IOAT_CHANERR_CONTROL_ERR	0x0400
+#define IOAT_CHANERR_LENGTH_ERR	0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR	0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR	0x2000
+#define IOAT_CHANERR_SOFT_ERR			0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR		0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR		0x10000
+#define IOAT_CHANERR_XOR_Q_ERR			0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR	0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 00000000..79e3eba2
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1726 @@
+/*
+ * offload engine driver for the Intel Xscale series of i/o processors
+ * Copyright © 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/ioport.h>
+#include <linux/raid/pq.h>
+#include <linux/slab.h>
+
+#include <mach/adma.h>
+
+#include "dmaengine.h"
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) \
+	container_of(dev, struct iop_adma_device, common)
+#define tx_to_iop_adma_slot(tx) \
+	container_of(tx, struct iop_adma_desc_slot, async_tx)
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+	int stride = slot->slots_per_op;
+
+	while (stride--) {
+		slot->slots_per_op = 0;
+		slot = list_entry(slot->slot_node.next,
+				struct iop_adma_desc_slot,
+				slot_node);
+	}
+}
+
+static void
+iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+	struct dma_async_tx_descriptor *tx = &desc->async_tx;
+	struct iop_adma_desc_slot *unmap = desc->group_head;
+	struct device *dev = &iop_chan->device->pdev->dev;
+	u32 len = unmap->unmap_len;
+	enum dma_ctrl_flags flags = tx->flags;
+	u32 src_cnt;
+	dma_addr_t addr;
+	dma_addr_t dest;
+
+	src_cnt = unmap->unmap_src_cnt;
+	dest = iop_desc_get_dest_addr(unmap, iop_chan);
+	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		enum dma_data_direction dir;
+
+		if (src_cnt > 1) /* is xor? */
+			dir = DMA_BIDIRECTIONAL;
+		else
+			dir = DMA_FROM_DEVICE;
+
+		dma_unmap_page(dev, dest, len, dir);
+	}
+
+	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		while (src_cnt--) {
+			addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+			if (addr == dest)
+				continue;
+			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+		}
+	}
+	desc->group_head = NULL;
+}
+
+static void
+iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+	struct dma_async_tx_descriptor *tx = &desc->async_tx;
+	struct iop_adma_desc_slot *unmap = desc->group_head;
+	struct device *dev = &iop_chan->device->pdev->dev;
+	u32 len = unmap->unmap_len;
+	enum dma_ctrl_flags flags = tx->flags;
+	u32 src_cnt = unmap->unmap_src_cnt;
+	dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+	dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+	int i;
+
+	if (tx->flags & DMA_PREP_CONTINUE)
+		src_cnt -= 3;
+
+	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+		dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+		dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+	}
+
+	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		dma_addr_t addr;
+
+		for (i = 0; i < src_cnt; i++) {
+			addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+		}
+		if (desc->pq_check_result) {
+			dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+			dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+		}
+	}
+
+	desc->group_head = NULL;
+}
+
+
+static dma_cookie_t
+iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
+	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
+{
+	struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+	BUG_ON(tx->cookie < 0);
+	if (tx->cookie > 0) {
+		cookie = tx->cookie;
+		tx->cookie = 0;
+
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		if (tx->callback)
+			tx->callback(tx->callback_param);
+
+		/* unmap dma addresses
+		 * (unmap_single vs unmap_page?)
+		 */
+		if (desc->group_head && desc->unmap_len) {
+			if (iop_desc_is_pq(desc))
+				iop_desc_unmap_pq(iop_chan, desc);
+			else
+				iop_desc_unmap(iop_chan, desc);
+		}
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(tx);
+
+	return cookie;
+}
+
+static int
+iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
+	struct iop_adma_chan *iop_chan)
+{
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx))
+		return 0;
+
+	/* leave the last descriptor in the chain
+	 * so we can append to it
+	 */
+	if (desc->chain_node.next == &iop_chan->chain)
+		return 1;
+
+	dev_dbg(iop_chan->device->common.dev,
+		"\tfree slot: %d slots_per_op: %d\n",
+		desc->idx, desc->slots_per_op);
+
+	list_del(&desc->chain_node);
+	iop_adma_free_slots(desc);
+
+	return 0;
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
+	dma_cookie_t cookie = 0;
+	u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+	int busy = iop_chan_is_busy(iop_chan);
+	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		pr_debug("\tcookie: %d slot: %d busy: %d "
+			"this_desc: %#x next_desc: %#x ack: %d\n",
+			iter->async_tx.cookie, iter->idx, busy,
+			iter->async_tx.phys, iop_desc_get_next_desc(iter),
+			async_tx_test_ack(&iter->async_tx));
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy, or if it appears that the current descriptor
+		 * needs to be re-read (i.e. has been appended to)
+		 */
+		if (iter->async_tx.phys == current_desc) {
+			BUG_ON(seen_current++);
+			if (busy || iop_desc_get_next_desc(iter))
+				break;
+		}
+
+		/* detect the start of a group transaction */
+		if (!slot_cnt && !slots_per_op) {
+			slot_cnt = iter->slot_cnt;
+			slots_per_op = iter->slots_per_op;
+			if (slot_cnt <= slots_per_op) {
+				slot_cnt = 0;
+				slots_per_op = 0;
+			}
+		}
+
+		if (slot_cnt) {
+			pr_debug("\tgroup++\n");
+			if (!grp_start)
+				grp_start = iter;
+			slot_cnt -= slots_per_op;
+		}
+
+		/* all the members of a group are complete */
+		if (slots_per_op != 0 && slot_cnt == 0) {
+			struct iop_adma_desc_slot *grp_iter, *_grp_iter;
+			int end_of_chain = 0;
+			pr_debug("\tgroup end\n");
+
+			/* collect the total results */
+			if (grp_start->xor_check_result) {
+				u32 zero_sum_result = 0;
+				slot_cnt = grp_start->slot_cnt;
+				grp_iter = grp_start;
+
+				list_for_each_entry_from(grp_iter,
+					&iop_chan->chain, chain_node) {
+					zero_sum_result |=
+					    iop_desc_get_zero_result(grp_iter);
+					    pr_debug("\titer%d result: %d\n",
+					    grp_iter->idx, zero_sum_result);
+					slot_cnt -= slots_per_op;
+					if (slot_cnt == 0)
+						break;
+				}
+				pr_debug("\tgrp_start->xor_check_result: %p\n",
+					grp_start->xor_check_result);
+				*grp_start->xor_check_result = zero_sum_result;
+			}
+
+			/* clean up the group */
+			slot_cnt = grp_start->slot_cnt;
+			grp_iter = grp_start;
+			list_for_each_entry_safe_from(grp_iter, _grp_iter,
+				&iop_chan->chain, chain_node) {
+				cookie = iop_adma_run_tx_complete_actions(
+					grp_iter, iop_chan, cookie);
+
+				slot_cnt -= slots_per_op;
+				end_of_chain = iop_adma_clean_slot(grp_iter,
+					iop_chan);
+
+				if (slot_cnt == 0 || end_of_chain)
+					break;
+			}
+
+			/* the group should be complete at this point */
+			BUG_ON(slot_cnt);
+
+			slots_per_op = 0;
+			grp_start = NULL;
+			if (end_of_chain)
+				break;
+			else
+				continue;
+		} else if (slots_per_op) /* wait for group completion */
+			continue;
+
+		/* write back zero sum results (single descriptor case) */
+		if (iter->xor_check_result && iter->async_tx.cookie)
+			*iter->xor_check_result =
+				iop_desc_get_zero_result(iter);
+
+		cookie = iop_adma_run_tx_complete_actions(
+					iter, iop_chan, cookie);
+
+		if (iop_adma_clean_slot(iter, iop_chan))
+			break;
+	}
+
+	if (cookie > 0) {
+		iop_chan->common.completed_cookie = cookie;
+		pr_debug("\tcompleted cookie %d\n", cookie);
+	}
+}
+
+static void
+iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	spin_lock_bh(&iop_chan->lock);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_adma_tasklet(unsigned long data)
+{
+	struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
+
+	/* lockdep will flag depedency submissions as potentially
+	 * recursive locking, this is not the case as a dependency
+	 * submission will never recurse a channels submit routine.
+	 * There are checks in async_tx.c to prevent this.
+	 */
+	spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+			int slots_per_op)
+{
+	struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
+	LIST_HEAD(chain);
+	int slots_found, retry = 0;
+
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = iop_chan->last_used;
+	else
+		iter = list_entry(&iop_chan->all_slots,
+			struct iop_adma_desc_slot,
+			slot_node);
+
+	list_for_each_entry_safe_continue(
+		iter, _iter, &iop_chan->all_slots, slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			/* give up after finding the first busy slot
+			 * on the second pass through the list
+			 */
+			if (retry)
+				break;
+
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++) {
+			if (iop_desc_is_aligned(iter, slots_per_op))
+				alloc_start = iter;
+			else {
+				slots_found = 0;
+				continue;
+			}
+		}
+
+		if (slots_found == num_slots) {
+			struct iop_adma_desc_slot *alloc_tail = NULL;
+			struct iop_adma_desc_slot *last_used = NULL;
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+				dev_dbg(iop_chan->device->common.dev,
+					"allocated slot: %d "
+					"(desc %p phys: %#x) slots_per_op %d\n",
+					iter->idx, iter->hw_desc,
+					iter->async_tx.phys, slots_per_op);
+
+				/* pre-ack all but the last descriptor */
+				if (num_slots != slots_per_op)
+					async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct iop_adma_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->tx_list);
+			iop_chan->last_used = last_used;
+			iop_desc_clear_next_desc(alloc_start);
+			iop_desc_clear_next_desc(alloc_tail);
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* perform direct reclaim if the allocation fails */
+	__iop_adma_slot_cleanup(iop_chan);
+
+	return NULL;
+}
+
+static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
+		iop_chan->pending);
+
+	if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+static dma_cookie_t
+iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
+	struct iop_adma_desc_slot *grp_start, *old_chain_tail;
+	int slot_cnt;
+	int slots_per_op;
+	dma_cookie_t cookie;
+	dma_addr_t next_dma;
+
+	grp_start = sw_desc->group_head;
+	slot_cnt = grp_start->slot_cnt;
+	slots_per_op = grp_start->slots_per_op;
+
+	spin_lock_bh(&iop_chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	old_chain_tail = list_entry(iop_chan->chain.prev,
+		struct iop_adma_desc_slot, chain_node);
+	list_splice_init(&sw_desc->tx_list,
+			 &old_chain_tail->chain_node);
+
+	/* fix up the hardware chain */
+	next_dma = grp_start->async_tx.phys;
+	iop_desc_set_next_desc(old_chain_tail, next_dma);
+	BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
+
+	/* check for pre-chained descriptors */
+	iop_paranoia(iop_desc_get_next_desc(sw_desc));
+
+	/* increment the pending count by the number of slots
+	 * memcpy operations have a 1:1 (slot:operation) relation
+	 * other operations are heavier and will pop the threshold
+	 * more often.
+	 */
+	iop_chan->pending += slot_cnt;
+	iop_adma_check_threshold(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+
+	dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
+		__func__, sw_desc->async_tx.cookie, sw_desc->idx);
+
+	return cookie;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/**
+ * iop_adma_alloc_chan_resources -  returns the number of allocated descriptors
+ * @chan - allocate descriptor resources for this channel
+ * @client - current client requesting the channel be ready for requests
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times.  To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+	char *hw_desc;
+	int idx;
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *slot = NULL;
+	int init = iop_chan->slots_allocated ? 0 : 1;
+	struct iop_adma_platform_data *plat_data =
+		iop_chan->device->pdev->dev.platform_data;
+	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
+
+	/* Allocate descriptor slots */
+	do {
+		idx = iop_chan->slots_allocated;
+		if (idx == num_descs_in_pool)
+			break;
+
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "IOP ADMA Channel only initialized"
+				" %d descriptor slots", idx);
+			break;
+		}
+		hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = iop_adma_tx_submit;
+		INIT_LIST_HEAD(&slot->tx_list);
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		hw_desc = (char *) iop_chan->device->dma_desc_pool;
+		slot->async_tx.phys =
+			(dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+		slot->idx = idx;
+
+		spin_lock_bh(&iop_chan->lock);
+		iop_chan->slots_allocated++;
+		list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+		spin_unlock_bh(&iop_chan->lock);
+	} while (iop_chan->slots_allocated < num_descs_in_pool);
+
+	if (idx && !iop_chan->last_used)
+		iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+					struct iop_adma_desc_slot,
+					slot_node);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"allocated %d descriptor slots last_used: %p\n",
+		iop_chan->slots_allocated, iop_chan->last_used);
+
+	/* initialize the channel and the chain with a null operation */
+	if (init) {
+		if (dma_has_cap(DMA_MEMCPY,
+			iop_chan->device->common.cap_mask))
+			iop_chan_start_null_memcpy(iop_chan);
+		else if (dma_has_cap(DMA_XOR,
+			iop_chan->device->common.cap_mask))
+			iop_chan_start_null_xor(iop_chan);
+		else
+			BUG();
+	}
+
+	return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_interrupt(grp_start, iop_chan);
+		grp_start->unmap_len = 0;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			 dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+		__func__, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_memcpy(grp_start, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
+			 int value, size_t len, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+		__func__, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_memset(grp_start, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_block_fill_val(grp_start, value);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+		      unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__func__, src_cnt, len, flags);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_xor(grp_start, src_cnt, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_xor_src_addr(grp_start, src_cnt,
+						  dma_src[src_cnt]);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+			  unsigned int src_cnt, size_t len, u32 *result,
+			  unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+
+	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+		__func__, src_cnt, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_zero_sum(grp_start, src_cnt, flags);
+		iop_desc_set_zero_sum_byte_count(grp_start, len);
+		grp_start->xor_check_result = result;
+		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
+			__func__, grp_start->xor_check_result);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+						       dma_src[src_cnt]);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		     unsigned int src_cnt, const unsigned char *scf, size_t len,
+		     unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *g;
+	int slot_cnt, slots_per_op;
+	int continue_srcs;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__func__, src_cnt, len, flags);
+
+	if (dmaf_p_disabled_continue(flags))
+		continue_srcs = 1+src_cnt;
+	else if (dmaf_continue(flags))
+		continue_srcs = 3+src_cnt;
+	else
+		continue_srcs = 0+src_cnt;
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		int i;
+
+		g = sw_desc->group_head;
+		iop_desc_set_byte_count(g, iop_chan, len);
+
+		/* even if P is disabled its destination address (bits
+		 * [3:0]) must match Q.  It is ok if P points to an
+		 * invalid address, it won't be written.
+		 */
+		if (flags & DMA_PREP_PQ_DISABLE_P)
+			dst[0] = dst[1] & 0x7;
+
+		iop_desc_set_pq_addr(g, dst);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+		for (i = 0; i < src_cnt; i++)
+			iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+		/* if we are continuing a previous operation factor in
+		 * the old p and q values, see the comment for dma_maxpq
+		 * in include/linux/dmaengine.h
+		 */
+		if (dmaf_p_disabled_continue(flags))
+			iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+		else if (dmaf_continue(flags)) {
+			iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+			iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+			iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+		}
+		iop_desc_init_pq(g, i, flags);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+			 unsigned int src_cnt, const unsigned char *scf,
+			 size_t len, enum sum_check_flags *pqres,
+			 unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *g;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+		__func__, src_cnt, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		/* for validate operations p and q are tagged onto the
+		 * end of the source list
+		 */
+		int pq_idx = src_cnt;
+
+		g = sw_desc->group_head;
+		iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+		iop_desc_set_pq_zero_sum_byte_count(g, len);
+		g->pq_check_result = pqres;
+		pr_debug("\t%s: g->pq_check_result: %p\n",
+			__func__, g->pq_check_result);
+		sw_desc->unmap_src_cnt = src_cnt+2;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+							  src[src_cnt],
+							  scf[src_cnt]);
+		iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	spin_lock_bh(&iop_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe_reverse(
+		iter, _iter, &iop_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		iop_chan->slots_allocated--;
+	}
+	iop_chan->last_used = NULL;
+
+	dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
+		__func__, iop_chan->slots_allocated);
+	spin_unlock_bh(&iop_chan->lock);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+}
+
+/**
+ * iop_adma_status - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ * @txstate: a holder for the current state of the channel or NULL
+ */
+static enum dma_status iop_adma_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	int ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static irqreturn_t iop_adma_eot_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	iop_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	iop_adma_device_clear_eoc_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+	unsigned long status = iop_chan_get_status(chan);
+
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		"error ( %s%s%s%s%s%s%s)\n",
+		iop_is_err_int_parity(status, chan) ? "int_parity " : "",
+		iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
+		iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
+		iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
+		iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
+		iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
+		iop_is_err_split_tx(status, chan) ? "split_tx " : "");
+
+	iop_adma_device_clear_err_status(chan);
+
+	BUG();
+
+	return IRQ_HANDLED;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+
+	if (iop_chan->pending) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+	int i;
+	void *src, *dest;
+	dma_addr_t src_dma, dest_dma;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	struct iop_adma_chan *iop_chan;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+		((u8 *) src)[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dest_dma = dma_map_single(dma_chan->device->dev, dest,
+				IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	src_dma = dma_map_single(dma_chan->device->dev, src,
+				IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				      IOP_ADMA_TEST_SIZE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(1);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+			DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	iop_chan = to_iop_adma_chan(dma_chan);
+	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+		IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
+	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dma_addr, dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 zero_sum_result;
+	int err = 0;
+	struct iop_adma_chan *iop_chan;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+			(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					   0, PAGE_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				   IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
+				   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	iop_chan = to_iop_adma_chan(dma_chan);
+	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+		PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_printk(KERN_ERR, dma_chan->device->dev,
+				"Self-test xor failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+	dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
+		PAGE_SIZE, DMA_TO_DEVICE);
+
+	/* skip zero sum if the capability is not present */
+	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	/* zero sum the sources with the destintation page */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		zero_sum_srcs[i] = xor_srcs[i];
+	zero_sum_srcs[i] = dest;
+
+	zero_sum_result = 1;
+
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+				       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+				       &zero_sum_result,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 0) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test memset */
+	dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
+			PAGE_SIZE, DMA_FROM_DEVICE);
+	tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test memset timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i]) {
+			dev_printk(KERN_ERR, dma_chan->device->dev,
+				"Self-test memset failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+	/* test for non-zero parity sum */
+	zero_sum_result = 0;
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+				       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+				       &zero_sum_result,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test non-zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 1) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test non-zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	src_idx = IOP_ADMA_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+#ifdef CONFIG_RAID6_PQ
+static int __devinit
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+	/* combined sources, software pq results, and extra hw pq results */
+	struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+	/* ptr to the extra hw pq buffers defined above */
+	struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+	/* address conversion buffers (dma_map / page_address) */
+	void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+	dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2];
+	dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST];
+
+	int i;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u32 zero_sum_result;
+	int err = 0;
+	struct device *dev;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	for (i = 0; i < ARRAY_SIZE(pq); i++) {
+		pq[i] = alloc_page(GFP_KERNEL);
+		if (!pq[i]) {
+			while (i--)
+				__free_page(pq[i]);
+			return -ENOMEM;
+		}
+	}
+
+	/* Fill in src buffers */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+		pq_sw[i] = page_address(pq[i]);
+		memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+	}
+	pq_sw[i] = page_address(pq[i]);
+	pq_sw[i+1] = page_address(pq[i+1]);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dev = dma_chan->device->dev;
+
+	/* initialize the dests */
+	memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+	memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+	/* test pq */
+	pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+
+	tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+				  IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+				  PAGE_SIZE,
+				  DMA_PREP_INTERRUPT |
+				  DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_SUCCESS) {
+		dev_err(dev, "Self-test pq timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+	if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+		   page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+		dev_err(dev, "Self-test p failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+		   page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+		dev_err(dev, "Self-test q failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test correct zero sum using the software generated pq values */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+
+	zero_sum_result = ~0;
+	tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+				      pq_src, IOP_ADMA_NUM_SRC_TEST,
+				      raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+				      DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_SUCCESS) {
+		dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 0) {
+		dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+			zero_sum_result);
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test incorrect zero sum */
+	i = IOP_ADMA_NUM_SRC_TEST;
+	memset(pq_sw[i] + 100, 0, 100);
+	memset(pq_sw[i+1] + 200, 0, 200);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+
+	zero_sum_result = 0;
+	tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+				      pq_src, IOP_ADMA_NUM_SRC_TEST,
+				      raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+				      DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_SUCCESS) {
+		dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+		dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+			zero_sum_result);
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	i = ARRAY_SIZE(pq);
+	while (i--)
+		__free_page(pq[i]);
+	return err;
+}
+#endif
+
+static int __devexit iop_adma_remove(struct platform_device *dev)
+{
+	struct iop_adma_device *device = platform_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+	struct iop_adma_chan *iop_chan;
+	struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+
+	dma_async_device_unregister(&device->common);
+
+	dma_free_coherent(&dev->dev, plat_data->pool_size,
+			device->dma_desc_pool_virt, device->dma_desc_pool);
+
+	list_for_each_entry_safe(chan, _chan, &device->common.channels,
+				device_node) {
+		iop_chan = to_iop_adma_chan(chan);
+		list_del(&chan->device_node);
+		kfree(iop_chan);
+	}
+	kfree(device);
+
+	return 0;
+}
+
+static int __devinit iop_adma_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int ret = 0, i;
+	struct iop_adma_device *adev;
+	struct iop_adma_chan *iop_chan;
+	struct dma_device *dma_dev;
+	struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				resource_size(res), pdev->name))
+		return -EBUSY;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	dma_dev = &adev->common;
+
+	/* allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+					plat_data->pool_size,
+					&adev->dma_desc_pool,
+					GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto err_free_adev;
+	}
+
+	dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %p\n",
+		__func__, adev->dma_desc_pool_virt,
+		(void *) adev->dma_desc_pool);
+
+	adev->id = plat_data->hw_id;
+
+	/* discover transaction capabilites from the platform data */
+	dma_dev->cap_mask = plat_data->cap_mask;
+
+	adev->pdev = pdev;
+	platform_set_drvdata(pdev, adev);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
+	dma_dev->device_tx_status = iop_adma_status;
+	dma_dev->device_issue_pending = iop_adma_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
+	if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->max_xor = iop_adma_get_max_xor();
+		dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
+	}
+	if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_xor_val =
+			iop_adma_prep_dma_xor_val;
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+		dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+		dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+	}
+	if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_pq_val =
+			iop_adma_prep_dma_pq_val;
+	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_interrupt =
+			iop_adma_prep_dma_interrupt;
+
+	iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
+	if (!iop_chan) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	iop_chan->device = adev;
+
+	iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
+					resource_size(res));
+	if (!iop_chan->mmr_base) {
+		ret = -ENOMEM;
+		goto err_free_iop_chan;
+	}
+	tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
+		iop_chan);
+
+	/* clear errors before enabling interrupts */
+	iop_adma_device_clear_err_status(iop_chan);
+
+	for (i = 0; i < 3; i++) {
+		irq_handler_t handler[] = { iop_adma_eot_handler,
+					iop_adma_eoc_handler,
+					iop_adma_err_handler };
+		int irq = platform_get_irq(pdev, i);
+		if (irq < 0) {
+			ret = -ENXIO;
+			goto err_free_iop_chan;
+		} else {
+			ret = devm_request_irq(&pdev->dev, irq,
+					handler[i], 0, pdev->name, iop_chan);
+			if (ret)
+				goto err_free_iop_chan;
+		}
+	}
+
+	spin_lock_init(&iop_chan->lock);
+	INIT_LIST_HEAD(&iop_chan->chain);
+	INIT_LIST_HEAD(&iop_chan->all_slots);
+	iop_chan->common.device = dma_dev;
+	dma_cookie_init(&iop_chan->common);
+	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		ret = iop_adma_memcpy_self_test(adev);
+		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+	    dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+		ret = iop_adma_xor_val_self_test(adev);
+		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+	    dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+		#ifdef CONFIG_RAID6_PQ
+		ret = iop_adma_pq_zero_sum_self_test(adev);
+		dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+		#else
+		/* can not test raid6, so do not publish capability */
+		dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+		dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+		ret = 0;
+		#endif
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
+	  "( %s%s%s%s%s%s%s)\n",
+	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
+	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+	  dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
+	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
+	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+	dma_async_device_register(dma_dev);
+	goto out;
+
+ err_free_iop_chan:
+	kfree(iop_chan);
+ err_free_dma:
+	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+			adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ err_free_adev:
+	kfree(adev);
+ out:
+	return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+
+		list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		iop_desc_init_memcpy(grp_start, 0);
+		iop_desc_set_byte_count(grp_start, iop_chan, 0);
+		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+		iop_desc_set_memcpy_src_addr(grp_start, 0);
+
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->common.completed_cookie = cookie - 1;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_adma_device_clear_err_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+		/* 1/ don't add pre-chained descriptors
+		 * 2/ dummy read to flush next_desc write
+		 */
+		BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		dev_printk(KERN_ERR, iop_chan->device->common.dev,
+			 "failed to allocate null descriptor\n");
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		iop_desc_init_null_xor(grp_start, 2, 0);
+		iop_desc_set_byte_count(grp_start, iop_chan, 0);
+		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+		iop_desc_set_xor_src_addr(grp_start, 0, 0);
+		iop_desc_set_xor_src_addr(grp_start, 1, 0);
+
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->common.completed_cookie = cookie - 1;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_adma_device_clear_err_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+		/* 1/ don't add pre-chained descriptors
+		 * 2/ dummy read to flush next_desc write
+		 */
+		BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		dev_printk(KERN_ERR, iop_chan->device->common.dev,
+			"failed to allocate null descriptor\n");
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static struct platform_driver iop_adma_driver = {
+	.probe		= iop_adma_probe,
+	.remove		= __devexit_p(iop_adma_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "iop-adma",
+	},
+};
+
+module_platform_driver(iop_adma_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:iop-adma");
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644
index 00000000..bb48a57c
--- /dev/null
+++ b/drivers/dma/iovlock.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <net/tcp.h> /* for memcpy_toiovec */
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+static int num_pages_spanned(struct iovec *iov)
+{
+	return
+	((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
+	((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/*
+ * Pin down all the iovec pages needed for len bytes.
+ * Return a struct dma_pinned_list to keep track of pages pinned down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
+{
+	struct dma_pinned_list *local_list;
+	struct page **pages;
+	int i;
+	int ret;
+	int nr_iovecs = 0;
+	int iovec_len_used = 0;
+	int iovec_pages_used = 0;
+
+	/* don't pin down non-user-based iovecs */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return NULL;
+
+	/* determine how many iovecs/pages there are, up front */
+	do {
+		iovec_len_used += iov[nr_iovecs].iov_len;
+		iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
+		nr_iovecs++;
+	} while (iovec_len_used < len);
+
+	/* single kmalloc for pinned list, page_list[], and the page arrays */
+	local_list = kmalloc(sizeof(*local_list)
+		+ (nr_iovecs * sizeof (struct dma_page_list))
+		+ (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+	if (!local_list)
+		goto out;
+
+	/* list of pages starts right after the page list array */
+	pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+	local_list->nr_iovecs = 0;
+
+	for (i = 0; i < nr_iovecs; i++) {
+		struct dma_page_list *page_list = &local_list->page_list[i];
+
+		len -= iov[i].iov_len;
+
+		if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
+			goto unpin;
+
+		page_list->nr_pages = num_pages_spanned(&iov[i]);
+		page_list->base_address = iov[i].iov_base;
+
+		page_list->pages = pages;
+		pages += page_list->nr_pages;
+
+		/* pin pages down */
+		down_read(&current->mm->mmap_sem);
+		ret = get_user_pages(
+			current,
+			current->mm,
+			(unsigned long) iov[i].iov_base,
+			page_list->nr_pages,
+			1,	/* write */
+			0,	/* force */
+			page_list->pages,
+			NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (ret != page_list->nr_pages)
+			goto unpin;
+
+		local_list->nr_iovecs = i + 1;
+	}
+
+	return local_list;
+
+unpin:
+	dma_unpin_iovec_pages(local_list);
+out:
+	return NULL;
+}
+
+void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
+{
+	int i, j;
+
+	if (!pinned_list)
+		return;
+
+	for (i = 0; i < pinned_list->nr_iovecs; i++) {
+		struct dma_page_list *page_list = &pinned_list->page_list[i];
+		for (j = 0; j < page_list->nr_pages; j++) {
+			set_page_dirty_lock(page_list->pages[j]);
+			page_cache_release(page_list->pages[j]);
+		}
+	}
+
+	kfree(pinned_list);
+}
+
+
+/*
+ * We have already pinned down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in pinned_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied into
+ *   by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
+{
+	int iov_byte_offset;
+	int copy;
+	dma_cookie_t dma_cookie = 0;
+	int iovec_idx;
+	int page_idx;
+
+	if (!chan)
+		return memcpy_toiovec(iov, kdata, len);
+
+	iovec_idx = 0;
+	while (iovec_idx < pinned_list->nr_iovecs) {
+		struct dma_page_list *page_list;
+
+		/* skip already used-up iovecs */
+		while (!iov[iovec_idx].iov_len)
+			iovec_idx++;
+
+		page_list = &pinned_list->page_list[iovec_idx];
+
+		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+		/* break up copies to not cross page boundary */
+		while (iov[iovec_idx].iov_len) {
+			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+			copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+			dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+					page_list->pages[page_idx],
+					iov_byte_offset,
+					kdata,
+					copy);
+			/* poll for a descriptor slot */
+			if (unlikely(dma_cookie < 0)) {
+				dma_async_issue_pending(chan);
+				continue;
+			}
+
+			len -= copy;
+			iov[iovec_idx].iov_len -= copy;
+			iov[iovec_idx].iov_base += copy;
+
+			if (!len)
+				return dma_cookie;
+
+			kdata += copy;
+			iov_byte_offset = 0;
+			page_idx++;
+		}
+		iovec_idx++;
+	}
+
+	/* really bad if we ever run out of iovecs */
+	BUG();
+	return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_pinned_list *pinned_list, struct page *page,
+	unsigned int offset, size_t len)
+{
+	int iov_byte_offset;
+	int copy;
+	dma_cookie_t dma_cookie = 0;
+	int iovec_idx;
+	int page_idx;
+	int err;
+
+	/* this needs as-yet-unimplemented buf-to-buff, so punt. */
+	/* TODO: use dma for this */
+	if (!chan || !pinned_list) {
+		u8 *vaddr = kmap(page);
+		err = memcpy_toiovec(iov, vaddr + offset, len);
+		kunmap(page);
+		return err;
+	}
+
+	iovec_idx = 0;
+	while (iovec_idx < pinned_list->nr_iovecs) {
+		struct dma_page_list *page_list;
+
+		/* skip already used-up iovecs */
+		while (!iov[iovec_idx].iov_len)
+			iovec_idx++;
+
+		page_list = &pinned_list->page_list[iovec_idx];
+
+		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+		/* break up copies to not cross page boundary */
+		while (iov[iovec_idx].iov_len) {
+			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+			copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+			dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+					page_list->pages[page_idx],
+					iov_byte_offset,
+					page,
+					offset,
+					copy);
+			/* poll for a descriptor slot */
+			if (unlikely(dma_cookie < 0)) {
+				dma_async_issue_pending(chan);
+				continue;
+			}
+
+			len -= copy;
+			iov[iovec_idx].iov_len -= copy;
+			iov[iovec_idx].iov_base += copy;
+
+			if (!len)
+				return dma_cookie;
+
+			offset += copy;
+			iov_byte_offset = 0;
+			page_idx++;
+		}
+		iovec_idx++;
+	}
+
+	/* really bad if we ever run out of iovecs */
+	BUG();
+	return -EFAULT;
+}
diff --git a/drivers/dma/ipu/Makefile b/drivers/dma/ipu/Makefile
new file mode 100644
index 00000000..6704cf48
--- /dev/null
+++ b/drivers/dma/ipu/Makefile
@@ -0,0 +1 @@
+obj-y	+= ipu_irq.o ipu_idmac.o
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
new file mode 100644
index 00000000..62e3f8ec
--- /dev/null
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -0,0 +1,1799 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include <mach/ipu.h>
+
+#include "../dmaengine.h"
+#include "ipu_intern.h"
+
+#define FS_VF_IN_VALID	0x00000002
+#define FS_ENC_IN_VALID	0x00000001
+
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+			       bool wait_for_stop);
+
+/*
+ * There can be only one, we could allocate it dynamically, but then we'd have
+ * to add an extra parameter to some functions, and use something as ugly as
+ *	struct ipu *ipu = to_ipu(to_idmac(ichan->dma_chan.device));
+ * in the ISR
+ */
+static struct ipu ipu_data;
+
+#define to_ipu(id) container_of(id, struct ipu, idmac)
+
+static u32 __idmac_read_icreg(struct ipu *ipu, unsigned long reg)
+{
+	return __raw_readl(ipu->reg_ic + reg);
+}
+
+#define idmac_read_icreg(ipu, reg) __idmac_read_icreg(ipu, reg - IC_CONF)
+
+static void __idmac_write_icreg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+	__raw_writel(value, ipu->reg_ic + reg);
+}
+
+#define idmac_write_icreg(ipu, v, reg) __idmac_write_icreg(ipu, v, reg - IC_CONF)
+
+static u32 idmac_read_ipureg(struct ipu *ipu, unsigned long reg)
+{
+	return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void idmac_write_ipureg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+	__raw_writel(value, ipu->reg_ipu + reg);
+}
+
+/*****************************************************************************
+ * IPU / IC common functions
+ */
+static void dump_idmac_reg(struct ipu *ipu)
+{
+	dev_dbg(ipu->dev, "IDMAC_CONF 0x%x, IC_CONF 0x%x, IDMAC_CHA_EN 0x%x, "
+		"IDMAC_CHA_PRI 0x%x, IDMAC_CHA_BUSY 0x%x\n",
+		idmac_read_icreg(ipu, IDMAC_CONF),
+		idmac_read_icreg(ipu, IC_CONF),
+		idmac_read_icreg(ipu, IDMAC_CHA_EN),
+		idmac_read_icreg(ipu, IDMAC_CHA_PRI),
+		idmac_read_icreg(ipu, IDMAC_CHA_BUSY));
+	dev_dbg(ipu->dev, "BUF0_RDY 0x%x, BUF1_RDY 0x%x, CUR_BUF 0x%x, "
+		"DB_MODE 0x%x, TASKS_STAT 0x%x\n",
+		idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+		idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+		idmac_read_ipureg(ipu, IPU_CHA_CUR_BUF),
+		idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL),
+		idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+}
+
+static uint32_t bytes_per_pixel(enum pixel_fmt fmt)
+{
+	switch (fmt) {
+	case IPU_PIX_FMT_GENERIC:	/* generic data */
+	case IPU_PIX_FMT_RGB332:
+	case IPU_PIX_FMT_YUV420P:
+	case IPU_PIX_FMT_YUV422P:
+	default:
+		return 1;
+	case IPU_PIX_FMT_RGB565:
+	case IPU_PIX_FMT_YUYV:
+	case IPU_PIX_FMT_UYVY:
+		return 2;
+	case IPU_PIX_FMT_BGR24:
+	case IPU_PIX_FMT_RGB24:
+		return 3;
+	case IPU_PIX_FMT_GENERIC_32:	/* generic data */
+	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_RGB32:
+	case IPU_PIX_FMT_ABGR32:
+		return 4;
+	}
+}
+
+/* Enable direct write to memory by the Camera Sensor Interface */
+static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+	uint32_t ic_conf, mask;
+
+	switch (channel) {
+	case IDMAC_IC_0:
+		mask = IC_CONF_PRPENC_EN;
+		break;
+	case IDMAC_IC_7:
+		mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+		break;
+	default:
+		return;
+	}
+	ic_conf = idmac_read_icreg(ipu, IC_CONF) | mask;
+	idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+/* Called under spin_lock_irqsave(&ipu_data.lock) */
+static void ipu_ic_disable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+	uint32_t ic_conf, mask;
+
+	switch (channel) {
+	case IDMAC_IC_0:
+		mask = IC_CONF_PRPENC_EN;
+		break;
+	case IDMAC_IC_7:
+		mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+		break;
+	default:
+		return;
+	}
+	ic_conf = idmac_read_icreg(ipu, IC_CONF) & ~mask;
+	idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+static uint32_t ipu_channel_status(struct ipu *ipu, enum ipu_channel channel)
+{
+	uint32_t stat = TASK_STAT_IDLE;
+	uint32_t task_stat_reg = idmac_read_ipureg(ipu, IPU_TASKS_STAT);
+
+	switch (channel) {
+	case IDMAC_IC_7:
+		stat = (task_stat_reg & TSTAT_CSI2MEM_MASK) >>
+			TSTAT_CSI2MEM_OFFSET;
+		break;
+	case IDMAC_IC_0:
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+	default:
+		break;
+	}
+	return stat;
+}
+
+struct chan_param_mem_planar {
+	/* Word 0 */
+	u32	xv:10;
+	u32	yv:10;
+	u32	xb:12;
+
+	u32	yb:12;
+	u32	res1:2;
+	u32	nsb:1;
+	u32	lnpb:6;
+	u32	ubo_l:11;
+
+	u32	ubo_h:15;
+	u32	vbo_l:17;
+
+	u32	vbo_h:9;
+	u32	res2:3;
+	u32	fw:12;
+	u32	fh_l:8;
+
+	u32	fh_h:4;
+	u32	res3:28;
+
+	/* Word 1 */
+	u32	eba0;
+
+	u32	eba1;
+
+	u32	bpp:3;
+	u32	sl:14;
+	u32	pfs:3;
+	u32	bam:3;
+	u32	res4:2;
+	u32	npb:6;
+	u32	res5:1;
+
+	u32	sat:2;
+	u32	res6:30;
+} __attribute__ ((packed));
+
+struct chan_param_mem_interleaved {
+	/* Word 0 */
+	u32	xv:10;
+	u32	yv:10;
+	u32	xb:12;
+
+	u32	yb:12;
+	u32	sce:1;
+	u32	res1:1;
+	u32	nsb:1;
+	u32	lnpb:6;
+	u32	sx:10;
+	u32	sy_l:1;
+
+	u32	sy_h:9;
+	u32	ns:10;
+	u32	sm:10;
+	u32	sdx_l:3;
+
+	u32	sdx_h:2;
+	u32	sdy:5;
+	u32	sdrx:1;
+	u32	sdry:1;
+	u32	sdr1:1;
+	u32	res2:2;
+	u32	fw:12;
+	u32	fh_l:8;
+
+	u32	fh_h:4;
+	u32	res3:28;
+
+	/* Word 1 */
+	u32	eba0;
+
+	u32	eba1;
+
+	u32	bpp:3;
+	u32	sl:14;
+	u32	pfs:3;
+	u32	bam:3;
+	u32	res4:2;
+	u32	npb:6;
+	u32	res5:1;
+
+	u32	sat:2;
+	u32	scc:1;
+	u32	ofs0:5;
+	u32	ofs1:5;
+	u32	ofs2:5;
+	u32	ofs3:5;
+	u32	wid0:3;
+	u32	wid1:3;
+	u32	wid2:3;
+
+	u32	wid3:3;
+	u32	dec_sel:1;
+	u32	res6:28;
+} __attribute__ ((packed));
+
+union chan_param_mem {
+	struct chan_param_mem_planar		pp;
+	struct chan_param_mem_interleaved	ip;
+};
+
+static void ipu_ch_param_set_plane_offset(union chan_param_mem *params,
+					  u32 u_offset, u32 v_offset)
+{
+	params->pp.ubo_l = u_offset & 0x7ff;
+	params->pp.ubo_h = u_offset >> 11;
+	params->pp.vbo_l = v_offset & 0x1ffff;
+	params->pp.vbo_h = v_offset >> 17;
+}
+
+static void ipu_ch_param_set_size(union chan_param_mem *params,
+				  uint32_t pixel_fmt, uint16_t width,
+				  uint16_t height, uint16_t stride)
+{
+	u32 u_offset;
+	u32 v_offset;
+
+	params->pp.fw		= width - 1;
+	params->pp.fh_l		= height - 1;
+	params->pp.fh_h		= (height - 1) >> 8;
+	params->pp.sl		= stride - 1;
+
+	switch (pixel_fmt) {
+	case IPU_PIX_FMT_GENERIC:
+		/*Represents 8-bit Generic data */
+		params->pp.bpp	= 3;
+		params->pp.pfs	= 7;
+		params->pp.npb	= 31;
+		params->pp.sat	= 2;		/* SAT = use 32-bit access */
+		break;
+	case IPU_PIX_FMT_GENERIC_32:
+		/*Represents 32-bit Generic data */
+		params->pp.bpp	= 0;
+		params->pp.pfs	= 7;
+		params->pp.npb	= 7;
+		params->pp.sat	= 2;		/* SAT = use 32-bit access */
+		break;
+	case IPU_PIX_FMT_RGB565:
+		params->ip.bpp	= 2;
+		params->ip.pfs	= 4;
+		params->ip.npb	= 15;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 0;		/* Red bit offset */
+		params->ip.ofs1	= 5;		/* Green bit offset */
+		params->ip.ofs2	= 11;		/* Blue bit offset */
+		params->ip.ofs3	= 16;		/* Alpha bit offset */
+		params->ip.wid0	= 4;		/* Red bit width - 1 */
+		params->ip.wid1	= 5;		/* Green bit width - 1 */
+		params->ip.wid2	= 4;		/* Blue bit width - 1 */
+		break;
+	case IPU_PIX_FMT_BGR24:
+		params->ip.bpp	= 1;		/* 24 BPP & RGB PFS */
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 0;		/* Red bit offset */
+		params->ip.ofs1	= 8;		/* Green bit offset */
+		params->ip.ofs2	= 16;		/* Blue bit offset */
+		params->ip.ofs3	= 24;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		break;
+	case IPU_PIX_FMT_RGB24:
+		params->ip.bpp	= 1;		/* 24 BPP & RGB PFS */
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 16;		/* Red bit offset */
+		params->ip.ofs1	= 8;		/* Green bit offset */
+		params->ip.ofs2	= 0;		/* Blue bit offset */
+		params->ip.ofs3	= 24;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		break;
+	case IPU_PIX_FMT_BGRA32:
+	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_ABGR32:
+		params->ip.bpp	= 0;
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 8;		/* Red bit offset */
+		params->ip.ofs1	= 16;		/* Green bit offset */
+		params->ip.ofs2	= 24;		/* Blue bit offset */
+		params->ip.ofs3	= 0;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		params->ip.wid3	= 7;		/* Alpha bit width - 1 */
+		break;
+	case IPU_PIX_FMT_RGBA32:
+	case IPU_PIX_FMT_RGB32:
+		params->ip.bpp	= 0;
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 24;		/* Red bit offset */
+		params->ip.ofs1	= 16;		/* Green bit offset */
+		params->ip.ofs2	= 8;		/* Blue bit offset */
+		params->ip.ofs3	= 0;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		params->ip.wid3	= 7;		/* Alpha bit width - 1 */
+		break;
+	case IPU_PIX_FMT_UYVY:
+		params->ip.bpp	= 2;
+		params->ip.pfs	= 6;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		break;
+	case IPU_PIX_FMT_YUV420P2:
+	case IPU_PIX_FMT_YUV420P:
+		params->ip.bpp	= 3;
+		params->ip.pfs	= 3;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		u_offset = stride * height;
+		v_offset = u_offset + u_offset / 4;
+		ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+		break;
+	case IPU_PIX_FMT_YVU422P:
+		params->ip.bpp	= 3;
+		params->ip.pfs	= 2;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		v_offset = stride * height;
+		u_offset = v_offset + v_offset / 2;
+		ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+		break;
+	case IPU_PIX_FMT_YUV422P:
+		params->ip.bpp	= 3;
+		params->ip.pfs	= 2;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		u_offset = stride * height;
+		v_offset = u_offset + u_offset / 2;
+		ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+		break;
+	default:
+		dev_err(ipu_data.dev,
+			"mx3 ipu: unimplemented pixel format %d\n", pixel_fmt);
+		break;
+	}
+
+	params->pp.nsb = 1;
+}
+
+static void ipu_ch_param_set_buffer(union chan_param_mem *params,
+				    dma_addr_t buf0, dma_addr_t buf1)
+{
+	params->pp.eba0 = buf0;
+	params->pp.eba1 = buf1;
+}
+
+static void ipu_ch_param_set_rotation(union chan_param_mem *params,
+				      enum ipu_rotate_mode rotate)
+{
+	params->pp.bam = rotate;
+}
+
+static void ipu_write_param_mem(uint32_t addr, uint32_t *data,
+				uint32_t num_words)
+{
+	for (; num_words > 0; num_words--) {
+		dev_dbg(ipu_data.dev,
+			"write param mem - addr = 0x%08X, data = 0x%08X\n",
+			addr, *data);
+		idmac_write_ipureg(&ipu_data, addr, IPU_IMA_ADDR);
+		idmac_write_ipureg(&ipu_data, *data++, IPU_IMA_DATA);
+		addr++;
+		if ((addr & 0x7) == 5) {
+			addr &= ~0x7;	/* set to word 0 */
+			addr += 8;	/* increment to next row */
+		}
+	}
+}
+
+static int calc_resize_coeffs(uint32_t in_size, uint32_t out_size,
+			      uint32_t *resize_coeff,
+			      uint32_t *downsize_coeff)
+{
+	uint32_t temp_size;
+	uint32_t temp_downsize;
+
+	*resize_coeff	= 1 << 13;
+	*downsize_coeff	= 1 << 13;
+
+	/* Cannot downsize more than 8:1 */
+	if (out_size << 3 < in_size)
+		return -EINVAL;
+
+	/* compute downsizing coefficient */
+	temp_downsize = 0;
+	temp_size = in_size;
+	while (temp_size >= out_size * 2 && temp_downsize < 2) {
+		temp_size >>= 1;
+		temp_downsize++;
+	}
+	*downsize_coeff = temp_downsize;
+
+	/*
+	 * compute resizing coefficient using the following formula:
+	 * resize_coeff = M*(SI -1)/(SO - 1)
+	 * where M = 2^13, SI - input size, SO - output size
+	 */
+	*resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1);
+	if (*resize_coeff >= 16384L) {
+		dev_err(ipu_data.dev, "Warning! Overflow on resize coeff.\n");
+		*resize_coeff = 0x3FFF;
+	}
+
+	dev_dbg(ipu_data.dev, "resizing from %u -> %u pixels, "
+		"downsize=%u, resize=%u.%lu (reg=%u)\n", in_size, out_size,
+		*downsize_coeff, *resize_coeff >= 8192L ? 1 : 0,
+		((*resize_coeff & 0x1FFF) * 10000L) / 8192L, *resize_coeff);
+
+	return 0;
+}
+
+static enum ipu_color_space format_to_colorspace(enum pixel_fmt fmt)
+{
+	switch (fmt) {
+	case IPU_PIX_FMT_RGB565:
+	case IPU_PIX_FMT_BGR24:
+	case IPU_PIX_FMT_RGB24:
+	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_RGB32:
+		return IPU_COLORSPACE_RGB;
+	default:
+		return IPU_COLORSPACE_YCBCR;
+	}
+}
+
+static int ipu_ic_init_prpenc(struct ipu *ipu,
+			      union ipu_channel_param *params, bool src_is_csi)
+{
+	uint32_t reg, ic_conf;
+	uint32_t downsize_coeff, resize_coeff;
+	enum ipu_color_space in_fmt, out_fmt;
+
+	/* Setup vertical resizing */
+	calc_resize_coeffs(params->video.in_height,
+			    params->video.out_height,
+			    &resize_coeff, &downsize_coeff);
+	reg = (downsize_coeff << 30) | (resize_coeff << 16);
+
+	/* Setup horizontal resizing */
+	calc_resize_coeffs(params->video.in_width,
+			    params->video.out_width,
+			    &resize_coeff, &downsize_coeff);
+	reg |= (downsize_coeff << 14) | resize_coeff;
+
+	/* Setup color space conversion */
+	in_fmt = format_to_colorspace(params->video.in_pixel_fmt);
+	out_fmt = format_to_colorspace(params->video.out_pixel_fmt);
+
+	/*
+	 * Colourspace conversion unsupported yet - see _init_csc() in
+	 * Freescale sources
+	 */
+	if (in_fmt != out_fmt) {
+		dev_err(ipu->dev, "Colourspace conversion unsupported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	idmac_write_icreg(ipu, reg, IC_PRP_ENC_RSC);
+
+	ic_conf = idmac_read_icreg(ipu, IC_CONF);
+
+	if (src_is_csi)
+		ic_conf &= ~IC_CONF_RWS_EN;
+	else
+		ic_conf |= IC_CONF_RWS_EN;
+
+	idmac_write_icreg(ipu, ic_conf, IC_CONF);
+
+	return 0;
+}
+
+static uint32_t dma_param_addr(uint32_t dma_ch)
+{
+	/* Channel Parameter Memory */
+	return 0x10000 | (dma_ch << 4);
+}
+
+static void ipu_channel_set_priority(struct ipu *ipu, enum ipu_channel channel,
+				     bool prio)
+{
+	u32 reg = idmac_read_icreg(ipu, IDMAC_CHA_PRI);
+
+	if (prio)
+		reg |= 1UL << channel;
+	else
+		reg &= ~(1UL << channel);
+
+	idmac_write_icreg(ipu, reg, IDMAC_CHA_PRI);
+
+	dump_idmac_reg(ipu);
+}
+
+static uint32_t ipu_channel_conf_mask(enum ipu_channel channel)
+{
+	uint32_t mask;
+
+	switch (channel) {
+	case IDMAC_IC_0:
+	case IDMAC_IC_7:
+		mask = IPU_CONF_CSI_EN | IPU_CONF_IC_EN;
+		break;
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+		mask = IPU_CONF_SDC_EN | IPU_CONF_DI_EN;
+		break;
+	default:
+		mask = 0;
+		break;
+	}
+
+	return mask;
+}
+
+/**
+ * ipu_enable_channel() - enable an IPU channel.
+ * @idmac:	IPU DMAC context.
+ * @ichan:	IDMAC channel.
+ * @return:	0 on success or negative error code on failure.
+ */
+static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+	struct ipu *ipu = to_ipu(idmac);
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	uint32_t reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	/* Reset to buffer 0 */
+	idmac_write_ipureg(ipu, 1UL << channel, IPU_CHA_CUR_BUF);
+	ichan->active_buffer = 0;
+	ichan->status = IPU_CHANNEL_ENABLED;
+
+	switch (channel) {
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+	case IDMAC_IC_7:
+		ipu_channel_set_priority(ipu, channel, true);
+	default:
+		break;
+	}
+
+	reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+
+	idmac_write_icreg(ipu, reg | (1UL << channel), IDMAC_CHA_EN);
+
+	ipu_ic_enable_task(ipu, channel);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+	return 0;
+}
+
+/**
+ * ipu_init_channel_buffer() - initialize a buffer for logical IPU channel.
+ * @ichan:	IDMAC channel.
+ * @pixel_fmt:	pixel format of buffer. Pixel format is a FOURCC ASCII code.
+ * @width:	width of buffer in pixels.
+ * @height:	height of buffer in pixels.
+ * @stride:	stride length of buffer in pixels.
+ * @rot_mode:	rotation mode of buffer. A rotation setting other than
+ *		IPU_ROTATE_VERT_FLIP should only be used for input buffers of
+ *		rotation channels.
+ * @phyaddr_0:	buffer 0 physical address.
+ * @phyaddr_1:	buffer 1 physical address. Setting this to a value other than
+ *		NULL enables double buffering mode.
+ * @return:	0 on success or negative error code on failure.
+ */
+static int ipu_init_channel_buffer(struct idmac_channel *ichan,
+				   enum pixel_fmt pixel_fmt,
+				   uint16_t width, uint16_t height,
+				   uint32_t stride,
+				   enum ipu_rotate_mode rot_mode,
+				   dma_addr_t phyaddr_0, dma_addr_t phyaddr_1)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+	struct ipu *ipu = to_ipu(idmac);
+	union chan_param_mem params = {};
+	unsigned long flags;
+	uint32_t reg;
+	uint32_t stride_bytes;
+
+	stride_bytes = stride * bytes_per_pixel(pixel_fmt);
+
+	if (stride_bytes % 4) {
+		dev_err(ipu->dev,
+			"Stride length must be 32-bit aligned, stride = %d, bytes = %d\n",
+			stride, stride_bytes);
+		return -EINVAL;
+	}
+
+	/* IC channel's stride must be a multiple of 8 pixels */
+	if ((channel <= IDMAC_IC_13) && (stride % 8)) {
+		dev_err(ipu->dev, "Stride must be 8 pixel multiple\n");
+		return -EINVAL;
+	}
+
+	/* Build parameter memory data for DMA channel */
+	ipu_ch_param_set_size(&params, pixel_fmt, width, height, stride_bytes);
+	ipu_ch_param_set_buffer(&params, phyaddr_0, phyaddr_1);
+	ipu_ch_param_set_rotation(&params, rot_mode);
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	ipu_write_param_mem(dma_param_addr(channel), (uint32_t *)&params, 10);
+
+	reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+
+	if (phyaddr_1)
+		reg |= 1UL << channel;
+	else
+		reg &= ~(1UL << channel);
+
+	idmac_write_ipureg(ipu, reg, IPU_CHA_DB_MODE_SEL);
+
+	ichan->status = IPU_CHANNEL_READY;
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ipu_select_buffer() - mark a channel's buffer as ready.
+ * @channel:	channel ID.
+ * @buffer_n:	buffer number to mark ready.
+ */
+static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
+{
+	/* No locking - this is a write-one-to-set register, cleared by IPU */
+	if (buffer_n == 0)
+		/* Mark buffer 0 as ready. */
+		idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF0_RDY);
+	else
+		/* Mark buffer 1 as ready. */
+		idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF1_RDY);
+}
+
+/**
+ * ipu_update_channel_buffer() - update physical address of a channel buffer.
+ * @ichan:	IDMAC channel.
+ * @buffer_n:	buffer number to update.
+ *		0 or 1 are the only valid values.
+ * @phyaddr:	buffer physical address.
+ */
+/* Called under spin_lock(_irqsave)(&ichan->lock) */
+static void ipu_update_channel_buffer(struct idmac_channel *ichan,
+				      int buffer_n, dma_addr_t phyaddr)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	uint32_t reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu_data.lock, flags);
+
+	if (buffer_n == 0) {
+		reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+		if (reg & (1UL << channel)) {
+			ipu_ic_disable_task(&ipu_data, channel);
+			ichan->status = IPU_CHANNEL_READY;
+		}
+
+		/* 44.3.3.1.9 - Row Number 1 (WORD1, offset 0) */
+		idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+				   0x0008UL, IPU_IMA_ADDR);
+		idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+	} else {
+		reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+		if (reg & (1UL << channel)) {
+			ipu_ic_disable_task(&ipu_data, channel);
+			ichan->status = IPU_CHANNEL_READY;
+		}
+
+		/* Check if double-buffering is already enabled */
+		reg = idmac_read_ipureg(&ipu_data, IPU_CHA_DB_MODE_SEL);
+
+		if (!(reg & (1UL << channel)))
+			idmac_write_ipureg(&ipu_data, reg | (1UL << channel),
+					   IPU_CHA_DB_MODE_SEL);
+
+		/* 44.3.3.1.9 - Row Number 1 (WORD1, offset 1) */
+		idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+				   0x0009UL, IPU_IMA_ADDR);
+		idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+	}
+
+	spin_unlock_irqrestore(&ipu_data.lock, flags);
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_buffer(struct idmac_channel *ichan,
+	struct idmac_tx_desc *desc, struct scatterlist *sg, int buf_idx)
+{
+	unsigned int chan_id = ichan->dma_chan.chan_id;
+	struct device *dev = &ichan->dma_chan.dev->device;
+
+	if (async_tx_test_ack(&desc->txd))
+		return -EINTR;
+
+	/*
+	 * On first invocation this shouldn't be necessary, the call to
+	 * ipu_init_channel_buffer() above will set addresses for us, so we
+	 * could make it conditional on status >= IPU_CHANNEL_ENABLED, but
+	 * doing it again shouldn't hurt either.
+	 */
+	ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg));
+
+	ipu_select_buffer(chan_id, buf_idx);
+	dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n",
+		sg, chan_id, buf_idx);
+
+	return 0;
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_channel_buffers(struct idmac_channel *ichan,
+				      struct idmac_tx_desc *desc)
+{
+	struct scatterlist *sg;
+	int i, ret = 0;
+
+	for (i = 0, sg = desc->sg; i < 2 && sg; i++) {
+		if (!ichan->sg[i]) {
+			ichan->sg[i] = sg;
+
+			ret = ipu_submit_buffer(ichan, desc, sg, i);
+			if (ret < 0)
+				return ret;
+
+			sg = sg_next(sg);
+		}
+	}
+
+	return ret;
+}
+
+static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct idmac_tx_desc *desc = to_tx_desc(tx);
+	struct idmac_channel *ichan = to_idmac_chan(tx->chan);
+	struct idmac *idmac = to_idmac(tx->chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	struct device *dev = &ichan->dma_chan.dev->device;
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int ret;
+
+	/* Sanity check */
+	if (!list_empty(&desc->list)) {
+		/* The descriptor doesn't belong to client */
+		dev_err(dev, "Descriptor %p not prepared!\n", tx);
+		return -EBUSY;
+	}
+
+	mutex_lock(&ichan->chan_mutex);
+
+	async_tx_clear_ack(tx);
+
+	if (ichan->status < IPU_CHANNEL_READY) {
+		struct idmac_video_param *video = &ichan->params.video;
+		/*
+		 * Initial buffer assignment - the first two sg-entries from
+		 * the descriptor will end up in the IDMAC buffers
+		 */
+		dma_addr_t dma_1 = sg_is_last(desc->sg) ? 0 :
+			sg_dma_address(&desc->sg[1]);
+
+		WARN_ON(ichan->sg[0] || ichan->sg[1]);
+
+		cookie = ipu_init_channel_buffer(ichan,
+						 video->out_pixel_fmt,
+						 video->out_width,
+						 video->out_height,
+						 video->out_stride,
+						 IPU_ROTATE_NONE,
+						 sg_dma_address(&desc->sg[0]),
+						 dma_1);
+		if (cookie < 0)
+			goto out;
+	}
+
+	dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* ipu->lock can be taken under ichan->lock, but not v.v. */
+	spin_lock_irqsave(&ichan->lock, flags);
+
+	list_add_tail(&desc->list, &ichan->queue);
+	/* submit_buffers() atomically verifies and fills empty sg slots */
+	ret = ipu_submit_channel_buffers(ichan, desc);
+
+	spin_unlock_irqrestore(&ichan->lock, flags);
+
+	if (ret < 0) {
+		cookie = ret;
+		goto dequeue;
+	}
+
+	if (ichan->status < IPU_CHANNEL_ENABLED) {
+		ret = ipu_enable_channel(idmac, ichan);
+		if (ret < 0) {
+			cookie = ret;
+			goto dequeue;
+		}
+	}
+
+	dump_idmac_reg(ipu);
+
+dequeue:
+	if (cookie < 0) {
+		spin_lock_irqsave(&ichan->lock, flags);
+		list_del_init(&desc->list);
+		spin_unlock_irqrestore(&ichan->lock, flags);
+		tx->cookie = cookie;
+		ichan->dma_chan.cookie = cookie;
+	}
+
+out:
+	mutex_unlock(&ichan->chan_mutex);
+
+	return cookie;
+}
+
+/* Called with ichan->chan_mutex held */
+static int idmac_desc_alloc(struct idmac_channel *ichan, int n)
+{
+	struct idmac_tx_desc *desc = vmalloc(n * sizeof(struct idmac_tx_desc));
+	struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+
+	if (!desc)
+		return -ENOMEM;
+
+	/* No interrupts, just disable the tasklet for a moment */
+	tasklet_disable(&to_ipu(idmac)->tasklet);
+
+	ichan->n_tx_desc = n;
+	ichan->desc = desc;
+	INIT_LIST_HEAD(&ichan->queue);
+	INIT_LIST_HEAD(&ichan->free_list);
+
+	while (n--) {
+		struct dma_async_tx_descriptor *txd = &desc->txd;
+
+		memset(txd, 0, sizeof(*txd));
+		dma_async_tx_descriptor_init(txd, &ichan->dma_chan);
+		txd->tx_submit		= idmac_tx_submit;
+
+		list_add(&desc->list, &ichan->free_list);
+
+		desc++;
+	}
+
+	tasklet_enable(&to_ipu(idmac)->tasklet);
+
+	return 0;
+}
+
+/**
+ * ipu_init_channel() - initialize an IPU channel.
+ * @idmac:	IPU DMAC context.
+ * @ichan:	pointer to the channel object.
+ * @return      0 on success or negative error code on failure.
+ */
+static int ipu_init_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+	union ipu_channel_param *params = &ichan->params;
+	uint32_t ipu_conf;
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	unsigned long flags;
+	uint32_t reg;
+	struct ipu *ipu = to_ipu(idmac);
+	int ret = 0, n_desc = 0;
+
+	dev_dbg(ipu->dev, "init channel = %d\n", channel);
+
+	if (channel != IDMAC_SDC_0 && channel != IDMAC_SDC_1 &&
+	    channel != IDMAC_IC_7)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	switch (channel) {
+	case IDMAC_IC_7:
+		n_desc = 16;
+		reg = idmac_read_icreg(ipu, IC_CONF);
+		idmac_write_icreg(ipu, reg & ~IC_CONF_CSI_MEM_WR_EN, IC_CONF);
+		break;
+	case IDMAC_IC_0:
+		n_desc = 16;
+		reg = idmac_read_ipureg(ipu, IPU_FS_PROC_FLOW);
+		idmac_write_ipureg(ipu, reg & ~FS_ENC_IN_VALID, IPU_FS_PROC_FLOW);
+		ret = ipu_ic_init_prpenc(ipu, params, true);
+		break;
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+		n_desc = 4;
+	default:
+		break;
+	}
+
+	ipu->channel_init_mask |= 1L << channel;
+
+	/* Enable IPU sub module */
+	ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) |
+		ipu_channel_conf_mask(channel);
+	idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	if (n_desc && !ichan->desc)
+		ret = idmac_desc_alloc(ichan, n_desc);
+
+	dump_idmac_reg(ipu);
+
+	return ret;
+}
+
+/**
+ * ipu_uninit_channel() - uninitialize an IPU channel.
+ * @idmac:	IPU DMAC context.
+ * @ichan:	pointer to the channel object.
+ */
+static void ipu_uninit_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	unsigned long flags;
+	uint32_t reg;
+	unsigned long chan_mask = 1UL << channel;
+	uint32_t ipu_conf;
+	struct ipu *ipu = to_ipu(idmac);
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	if (!(ipu->channel_init_mask & chan_mask)) {
+		dev_err(ipu->dev, "Channel already uninitialized %d\n",
+			channel);
+		spin_unlock_irqrestore(&ipu->lock, flags);
+		return;
+	}
+
+	/* Reset the double buffer */
+	reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+	idmac_write_ipureg(ipu, reg & ~chan_mask, IPU_CHA_DB_MODE_SEL);
+
+	ichan->sec_chan_en = false;
+
+	switch (channel) {
+	case IDMAC_IC_7:
+		reg = idmac_read_icreg(ipu, IC_CONF);
+		idmac_write_icreg(ipu, reg & ~(IC_CONF_RWS_EN | IC_CONF_PRPENC_EN),
+			     IC_CONF);
+		break;
+	case IDMAC_IC_0:
+		reg = idmac_read_icreg(ipu, IC_CONF);
+		idmac_write_icreg(ipu, reg & ~(IC_CONF_PRPENC_EN | IC_CONF_PRPENC_CSC1),
+				  IC_CONF);
+		break;
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+	default:
+		break;
+	}
+
+	ipu->channel_init_mask &= ~(1L << channel);
+
+	ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) &
+		~ipu_channel_conf_mask(channel);
+	idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	ichan->n_tx_desc = 0;
+	vfree(ichan->desc);
+	ichan->desc = NULL;
+}
+
+/**
+ * ipu_disable_channel() - disable an IPU channel.
+ * @idmac:		IPU DMAC context.
+ * @ichan:		channel object pointer.
+ * @wait_for_stop:	flag to set whether to wait for channel end of frame or
+ *			return immediately.
+ * @return:		0 on success or negative error code on failure.
+ */
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+			       bool wait_for_stop)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	struct ipu *ipu = to_ipu(idmac);
+	uint32_t reg;
+	unsigned long flags;
+	unsigned long chan_mask = 1UL << channel;
+	unsigned int timeout;
+
+	if (wait_for_stop && channel != IDMAC_SDC_1 && channel != IDMAC_SDC_0) {
+		timeout = 40;
+		/* This waiting always fails. Related to spurious irq problem */
+		while ((idmac_read_icreg(ipu, IDMAC_CHA_BUSY) & chan_mask) ||
+		       (ipu_channel_status(ipu, channel) == TASK_STAT_ACTIVE)) {
+			timeout--;
+			msleep(10);
+
+			if (!timeout) {
+				dev_dbg(ipu->dev,
+					"Warning: timeout waiting for channel %u to "
+					"stop: buf0_rdy = 0x%08X, buf1_rdy = 0x%08X, "
+					"busy = 0x%08X, tstat = 0x%08X\n", channel,
+					idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+					idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+					idmac_read_icreg(ipu, IDMAC_CHA_BUSY),
+					idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+				break;
+			}
+		}
+		dev_dbg(ipu->dev, "timeout = %d * 10ms\n", 40 - timeout);
+	}
+	/* SDC BG and FG must be disabled before DMA is disabled */
+	if (wait_for_stop && (channel == IDMAC_SDC_0 ||
+			      channel == IDMAC_SDC_1)) {
+		for (timeout = 5;
+		     timeout && !ipu_irq_status(ichan->eof_irq); timeout--)
+			msleep(5);
+	}
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	/* Disable IC task */
+	ipu_ic_disable_task(ipu, channel);
+
+	/* Disable DMA channel(s) */
+	reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+	idmac_write_icreg(ipu, reg & ~chan_mask, IDMAC_CHA_EN);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	return 0;
+}
+
+static struct scatterlist *idmac_sg_next(struct idmac_channel *ichan,
+	struct idmac_tx_desc **desc, struct scatterlist *sg)
+{
+	struct scatterlist *sgnew = sg ? sg_next(sg) : NULL;
+
+	if (sgnew)
+		/* next sg-element in this list */
+		return sgnew;
+
+	if ((*desc)->list.next == &ichan->queue)
+		/* No more descriptors on the queue */
+		return NULL;
+
+	/* Fetch next descriptor */
+	*desc = list_entry((*desc)->list.next, struct idmac_tx_desc, list);
+	return (*desc)->sg;
+}
+
+/*
+ * We have several possibilities here:
+ * current BUF		next BUF
+ *
+ * not last sg		next not last sg
+ * not last sg		next last sg
+ * last sg		first sg from next descriptor
+ * last sg		NULL
+ *
+ * Besides, the descriptor queue might be empty or not. We process all these
+ * cases carefully.
+ */
+static irqreturn_t idmac_interrupt(int irq, void *dev_id)
+{
+	struct idmac_channel *ichan = dev_id;
+	struct device *dev = &ichan->dma_chan.dev->device;
+	unsigned int chan_id = ichan->dma_chan.chan_id;
+	struct scatterlist **sg, *sgnext, *sgnew = NULL;
+	/* Next transfer descriptor */
+	struct idmac_tx_desc *desc, *descnew;
+	dma_async_tx_callback callback;
+	void *callback_param;
+	bool done = false;
+	u32 ready0, ready1, curbuf, err;
+	unsigned long flags;
+
+	/* IDMAC has cleared the respective BUFx_RDY bit, we manage the buffer */
+
+	dev_dbg(dev, "IDMAC irq %d, buf %d\n", irq, ichan->active_buffer);
+
+	spin_lock_irqsave(&ipu_data.lock, flags);
+
+	ready0	= idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+	ready1	= idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+	curbuf	= idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+	err	= idmac_read_ipureg(&ipu_data, IPU_INT_STAT_4);
+
+	if (err & (1 << chan_id)) {
+		idmac_write_ipureg(&ipu_data, 1 << chan_id, IPU_INT_STAT_4);
+		spin_unlock_irqrestore(&ipu_data.lock, flags);
+		/*
+		 * Doing this
+		 * ichan->sg[0] = ichan->sg[1] = NULL;
+		 * you can force channel re-enable on the next tx_submit(), but
+		 * this is dirty - think about descriptors with multiple
+		 * sg elements.
+		 */
+		dev_warn(dev, "NFB4EOF on channel %d, ready %x, %x, cur %x\n",
+			 chan_id, ready0, ready1, curbuf);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&ipu_data.lock, flags);
+
+	/* Other interrupts do not interfere with this channel */
+	spin_lock(&ichan->lock);
+	if (unlikely((ichan->active_buffer && (ready1 >> chan_id) & 1) ||
+		     (!ichan->active_buffer && (ready0 >> chan_id) & 1)
+		     )) {
+		spin_unlock(&ichan->lock);
+		dev_dbg(dev,
+			"IRQ with active buffer still ready on channel %x, "
+			"active %d, ready %x, %x!\n", chan_id,
+			ichan->active_buffer, ready0, ready1);
+		return IRQ_NONE;
+	}
+
+	if (unlikely(list_empty(&ichan->queue))) {
+		ichan->sg[ichan->active_buffer] = NULL;
+		spin_unlock(&ichan->lock);
+		dev_err(dev,
+			"IRQ without queued buffers on channel %x, active %d, "
+			"ready %x, %x!\n", chan_id,
+			ichan->active_buffer, ready0, ready1);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * active_buffer is a software flag, it shows which buffer we are
+	 * currently expecting back from the hardware, IDMAC should be
+	 * processing the other buffer already
+	 */
+	sg = &ichan->sg[ichan->active_buffer];
+	sgnext = ichan->sg[!ichan->active_buffer];
+
+	if (!*sg) {
+		spin_unlock(&ichan->lock);
+		return IRQ_HANDLED;
+	}
+
+	desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
+	descnew = desc;
+
+	dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
+		irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+
+	/* Find the descriptor of sgnext */
+	sgnew = idmac_sg_next(ichan, &descnew, *sg);
+	if (sgnext != sgnew)
+		dev_err(dev, "Submitted buffer %p, next buffer %p\n", sgnext, sgnew);
+
+	/*
+	 * if sgnext == NULL sg must be the last element in a scatterlist and
+	 * queue must be empty
+	 */
+	if (unlikely(!sgnext)) {
+		if (!WARN_ON(sg_next(*sg)))
+			dev_dbg(dev, "Underrun on channel %x\n", chan_id);
+		ichan->sg[!ichan->active_buffer] = sgnew;
+
+		if (unlikely(sgnew)) {
+			ipu_submit_buffer(ichan, descnew, sgnew, !ichan->active_buffer);
+		} else {
+			spin_lock_irqsave(&ipu_data.lock, flags);
+			ipu_ic_disable_task(&ipu_data, chan_id);
+			spin_unlock_irqrestore(&ipu_data.lock, flags);
+			ichan->status = IPU_CHANNEL_READY;
+			/* Continue to check for complete descriptor */
+		}
+	}
+
+	/* Calculate and submit the next sg element */
+	sgnew = idmac_sg_next(ichan, &descnew, sgnew);
+
+	if (unlikely(!sg_next(*sg)) || !sgnext) {
+		/*
+		 * Last element in scatterlist done, remove from the queue,
+		 * _init for debugging
+		 */
+		list_del_init(&desc->list);
+		done = true;
+	}
+
+	*sg = sgnew;
+
+	if (likely(sgnew) &&
+	    ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) {
+		callback = descnew->txd.callback;
+		callback_param = descnew->txd.callback_param;
+		list_del_init(&descnew->list);
+		spin_unlock(&ichan->lock);
+		if (callback)
+			callback(callback_param);
+		spin_lock(&ichan->lock);
+	}
+
+	/* Flip the active buffer - even if update above failed */
+	ichan->active_buffer = !ichan->active_buffer;
+	if (done)
+		dma_cookie_complete(&desc->txd);
+
+	callback = desc->txd.callback;
+	callback_param = desc->txd.callback_param;
+
+	spin_unlock(&ichan->lock);
+
+	if (done && (desc->txd.flags & DMA_PREP_INTERRUPT) && callback)
+		callback(callback_param);
+
+	return IRQ_HANDLED;
+}
+
+static void ipu_gc_tasklet(unsigned long arg)
+{
+	struct ipu *ipu = (struct ipu *)arg;
+	int i;
+
+	for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+		struct idmac_channel *ichan = ipu->channel + i;
+		struct idmac_tx_desc *desc;
+		unsigned long flags;
+		struct scatterlist *sg;
+		int j, k;
+
+		for (j = 0; j < ichan->n_tx_desc; j++) {
+			desc = ichan->desc + j;
+			spin_lock_irqsave(&ichan->lock, flags);
+			if (async_tx_test_ack(&desc->txd)) {
+				list_move(&desc->list, &ichan->free_list);
+				for_each_sg(desc->sg, sg, desc->sg_len, k) {
+					if (ichan->sg[0] == sg)
+						ichan->sg[0] = NULL;
+					else if (ichan->sg[1] == sg)
+						ichan->sg[1] = NULL;
+				}
+				async_tx_clear_ack(&desc->txd);
+			}
+			spin_unlock_irqrestore(&ichan->lock, flags);
+		}
+	}
+}
+
+/* Allocate and initialise a transfer descriptor. */
+static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
+		struct scatterlist *sgl, unsigned int sg_len,
+		enum dma_transfer_direction direction, unsigned long tx_flags,
+		void *context)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac_tx_desc *desc = NULL;
+	struct dma_async_tx_descriptor *txd = NULL;
+	unsigned long flags;
+
+	/* We only can handle these three channels so far */
+	if (chan->chan_id != IDMAC_SDC_0 && chan->chan_id != IDMAC_SDC_1 &&
+	    chan->chan_id != IDMAC_IC_7)
+		return NULL;
+
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) {
+		dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
+		return NULL;
+	}
+
+	mutex_lock(&ichan->chan_mutex);
+
+	spin_lock_irqsave(&ichan->lock, flags);
+	if (!list_empty(&ichan->free_list)) {
+		desc = list_entry(ichan->free_list.next,
+				  struct idmac_tx_desc, list);
+
+		list_del_init(&desc->list);
+
+		desc->sg_len	= sg_len;
+		desc->sg	= sgl;
+		txd		= &desc->txd;
+		txd->flags	= tx_flags;
+	}
+	spin_unlock_irqrestore(&ichan->lock, flags);
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	tasklet_schedule(&to_ipu(to_idmac(chan->device))->tasklet);
+
+	return txd;
+}
+
+/* Re-select the current buffer and re-activate the channel */
+static void idmac_issue_pending(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	unsigned long flags;
+
+	/* This is not always needed, but doesn't hurt either */
+	spin_lock_irqsave(&ipu->lock, flags);
+	ipu_select_buffer(chan->chan_id, ichan->active_buffer);
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	/*
+	 * Might need to perform some parts of initialisation from
+	 * ipu_enable_channel(), but not all, we do not want to reset to buffer
+	 * 0, don't need to set priority again either, but re-enabling the task
+	 * and the channel might be a good idea.
+	 */
+}
+
+static int __idmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			   unsigned long arg)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	struct list_head *list, *tmp;
+	unsigned long flags;
+	int i;
+
+	switch (cmd) {
+	case DMA_PAUSE:
+		spin_lock_irqsave(&ipu->lock, flags);
+		ipu_ic_disable_task(ipu, chan->chan_id);
+
+		/* Return all descriptors into "prepared" state */
+		list_for_each_safe(list, tmp, &ichan->queue)
+			list_del_init(list);
+
+		ichan->sg[0] = NULL;
+		ichan->sg[1] = NULL;
+
+		spin_unlock_irqrestore(&ipu->lock, flags);
+
+		ichan->status = IPU_CHANNEL_INITIALIZED;
+		break;
+	case DMA_TERMINATE_ALL:
+		ipu_disable_channel(idmac, ichan,
+				    ichan->status >= IPU_CHANNEL_ENABLED);
+
+		tasklet_disable(&ipu->tasklet);
+
+		/* ichan->queue is modified in ISR, have to spinlock */
+		spin_lock_irqsave(&ichan->lock, flags);
+		list_splice_init(&ichan->queue, &ichan->free_list);
+
+		if (ichan->desc)
+			for (i = 0; i < ichan->n_tx_desc; i++) {
+				struct idmac_tx_desc *desc = ichan->desc + i;
+				if (list_empty(&desc->list))
+					/* Descriptor was prepared, but not submitted */
+					list_add(&desc->list, &ichan->free_list);
+
+				async_tx_clear_ack(&desc->txd);
+			}
+
+		ichan->sg[0] = NULL;
+		ichan->sg[1] = NULL;
+		spin_unlock_irqrestore(&ichan->lock, flags);
+
+		tasklet_enable(&ipu->tasklet);
+
+		ichan->status = IPU_CHANNEL_INITIALIZED;
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+
+static int idmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	int ret;
+
+	mutex_lock(&ichan->chan_mutex);
+
+	ret = __idmac_control(chan, cmd, arg);
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	return ret;
+}
+
+#ifdef DEBUG
+static irqreturn_t ic_sof_irq(int irq, void *dev_id)
+{
+	struct idmac_channel *ichan = dev_id;
+	printk(KERN_DEBUG "Got SOF IRQ %d on Channel %d\n",
+	       irq, ichan->dma_chan.chan_id);
+	disable_irq_nosync(irq);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ic_eof_irq(int irq, void *dev_id)
+{
+	struct idmac_channel *ichan = dev_id;
+	printk(KERN_DEBUG "Got EOF IRQ %d on Channel %d\n",
+	       irq, ichan->dma_chan.chan_id);
+	disable_irq_nosync(irq);
+	return IRQ_HANDLED;
+}
+
+static int ic_sof = -EINVAL, ic_eof = -EINVAL;
+#endif
+
+static int idmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	int ret;
+
+	/* dmaengine.c now guarantees to only offer free channels */
+	BUG_ON(chan->client_count > 1);
+	WARN_ON(ichan->status != IPU_CHANNEL_FREE);
+
+	dma_cookie_init(chan);
+
+	ret = ipu_irq_map(chan->chan_id);
+	if (ret < 0)
+		goto eimap;
+
+	ichan->eof_irq = ret;
+
+	/*
+	 * Important to first disable the channel, because maybe someone
+	 * used it before us, e.g., the bootloader
+	 */
+	ipu_disable_channel(idmac, ichan, true);
+
+	ret = ipu_init_channel(idmac, ichan);
+	if (ret < 0)
+		goto eichan;
+
+	ret = request_irq(ichan->eof_irq, idmac_interrupt, 0,
+			  ichan->eof_name, ichan);
+	if (ret < 0)
+		goto erirq;
+
+#ifdef DEBUG
+	if (chan->chan_id == IDMAC_IC_7) {
+		ic_sof = ipu_irq_map(69);
+		if (ic_sof > 0)
+			request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+		ic_eof = ipu_irq_map(70);
+		if (ic_eof > 0)
+			request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+	}
+#endif
+
+	ichan->status = IPU_CHANNEL_INITIALIZED;
+
+	dev_dbg(&chan->dev->device, "Found channel 0x%x, irq %d\n",
+		chan->chan_id, ichan->eof_irq);
+
+	return ret;
+
+erirq:
+	ipu_uninit_channel(idmac, ichan);
+eichan:
+	ipu_irq_unmap(chan->chan_id);
+eimap:
+	return ret;
+}
+
+static void idmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+
+	mutex_lock(&ichan->chan_mutex);
+
+	__idmac_control(chan, DMA_TERMINATE_ALL, 0);
+
+	if (ichan->status > IPU_CHANNEL_FREE) {
+#ifdef DEBUG
+		if (chan->chan_id == IDMAC_IC_7) {
+			if (ic_sof > 0) {
+				free_irq(ic_sof, ichan);
+				ipu_irq_unmap(69);
+				ic_sof = -EINVAL;
+			}
+			if (ic_eof > 0) {
+				free_irq(ic_eof, ichan);
+				ipu_irq_unmap(70);
+				ic_eof = -EINVAL;
+			}
+		}
+#endif
+		free_irq(ichan->eof_irq, ichan);
+		ipu_irq_unmap(chan->chan_id);
+	}
+
+	ichan->status = IPU_CHANNEL_FREE;
+
+	ipu_uninit_channel(idmac, ichan);
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	tasklet_schedule(&to_ipu(idmac)->tasklet);
+}
+
+static enum dma_status idmac_tx_status(struct dma_chan *chan,
+		       dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
+	if (cookie != chan->cookie)
+		return DMA_ERROR;
+	return DMA_SUCCESS;
+}
+
+static int __init ipu_idmac_init(struct ipu *ipu)
+{
+	struct idmac *idmac = &ipu->idmac;
+	struct dma_device *dma = &idmac->dma;
+	int i;
+
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	/* Compulsory common fields */
+	dma->dev				= ipu->dev;
+	dma->device_alloc_chan_resources	= idmac_alloc_chan_resources;
+	dma->device_free_chan_resources		= idmac_free_chan_resources;
+	dma->device_tx_status			= idmac_tx_status;
+	dma->device_issue_pending		= idmac_issue_pending;
+
+	/* Compulsory for DMA_SLAVE fields */
+	dma->device_prep_slave_sg		= idmac_prep_slave_sg;
+	dma->device_control			= idmac_control;
+
+	INIT_LIST_HEAD(&dma->channels);
+	for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+		struct idmac_channel *ichan = ipu->channel + i;
+		struct dma_chan *dma_chan = &ichan->dma_chan;
+
+		spin_lock_init(&ichan->lock);
+		mutex_init(&ichan->chan_mutex);
+
+		ichan->status		= IPU_CHANNEL_FREE;
+		ichan->sec_chan_en	= false;
+		snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i);
+
+		dma_chan->device	= &idmac->dma;
+		dma_cookie_init(dma_chan);
+		dma_chan->chan_id	= i;
+		list_add_tail(&dma_chan->device_node, &dma->channels);
+	}
+
+	idmac_write_icreg(ipu, 0x00000070, IDMAC_CONF);
+
+	return dma_async_device_register(&idmac->dma);
+}
+
+static void __exit ipu_idmac_exit(struct ipu *ipu)
+{
+	int i;
+	struct idmac *idmac = &ipu->idmac;
+
+	for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+		struct idmac_channel *ichan = ipu->channel + i;
+
+		idmac_control(&ichan->dma_chan, DMA_TERMINATE_ALL, 0);
+	}
+
+	dma_async_device_unregister(&idmac->dma);
+}
+
+/*****************************************************************************
+ * IPU common probe / remove
+ */
+
+static int __init ipu_probe(struct platform_device *pdev)
+{
+	struct ipu_platform_data *pdata = pdev->dev.platform_data;
+	struct resource *mem_ipu, *mem_ic;
+	int ret;
+
+	spin_lock_init(&ipu_data.lock);
+
+	mem_ipu	= platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mem_ic	= platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!pdata || !mem_ipu || !mem_ic)
+		return -EINVAL;
+
+	ipu_data.dev = &pdev->dev;
+
+	platform_set_drvdata(pdev, &ipu_data);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto err_noirq;
+
+	ipu_data.irq_fn = ret;
+	ret = platform_get_irq(pdev, 1);
+	if (ret < 0)
+		goto err_noirq;
+
+	ipu_data.irq_err = ret;
+	ipu_data.irq_base = pdata->irq_base;
+
+	dev_dbg(&pdev->dev, "fn irq %u, err irq %u, irq-base %u\n",
+		ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base);
+
+	/* Remap IPU common registers */
+	ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu));
+	if (!ipu_data.reg_ipu) {
+		ret = -ENOMEM;
+		goto err_ioremap_ipu;
+	}
+
+	/* Remap Image Converter and Image DMA Controller registers */
+	ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic));
+	if (!ipu_data.reg_ic) {
+		ret = -ENOMEM;
+		goto err_ioremap_ic;
+	}
+
+	/* Get IPU clock */
+	ipu_data.ipu_clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(ipu_data.ipu_clk)) {
+		ret = PTR_ERR(ipu_data.ipu_clk);
+		goto err_clk_get;
+	}
+
+	/* Make sure IPU HSP clock is running */
+	clk_enable(ipu_data.ipu_clk);
+
+	/* Disable all interrupts */
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_1);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_2);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_3);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_4);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_5);
+
+	dev_dbg(&pdev->dev, "%s @ 0x%08lx, fn irq %u, err irq %u\n", pdev->name,
+		(unsigned long)mem_ipu->start, ipu_data.irq_fn, ipu_data.irq_err);
+
+	ret = ipu_irq_attach_irq(&ipu_data, pdev);
+	if (ret < 0)
+		goto err_attach_irq;
+
+	/* Initialize DMA engine */
+	ret = ipu_idmac_init(&ipu_data);
+	if (ret < 0)
+		goto err_idmac_init;
+
+	tasklet_init(&ipu_data.tasklet, ipu_gc_tasklet, (unsigned long)&ipu_data);
+
+	ipu_data.dev = &pdev->dev;
+
+	dev_dbg(ipu_data.dev, "IPU initialized\n");
+
+	return 0;
+
+err_idmac_init:
+err_attach_irq:
+	ipu_irq_detach_irq(&ipu_data, pdev);
+	clk_disable(ipu_data.ipu_clk);
+	clk_put(ipu_data.ipu_clk);
+err_clk_get:
+	iounmap(ipu_data.reg_ic);
+err_ioremap_ic:
+	iounmap(ipu_data.reg_ipu);
+err_ioremap_ipu:
+err_noirq:
+	dev_err(&pdev->dev, "Failed to probe IPU: %d\n", ret);
+	return ret;
+}
+
+static int __exit ipu_remove(struct platform_device *pdev)
+{
+	struct ipu *ipu = platform_get_drvdata(pdev);
+
+	ipu_idmac_exit(ipu);
+	ipu_irq_detach_irq(ipu, pdev);
+	clk_disable(ipu->ipu_clk);
+	clk_put(ipu->ipu_clk);
+	iounmap(ipu->reg_ic);
+	iounmap(ipu->reg_ipu);
+	tasklet_kill(&ipu->tasklet);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+/*
+ * We need two MEM resources - with IPU-common and Image Converter registers,
+ * including PF_CONF and IDMAC_* registers, and two IRQs - function and error
+ */
+static struct platform_driver ipu_platform_driver = {
+	.driver = {
+		.name	= "ipu-core",
+		.owner	= THIS_MODULE,
+	},
+	.remove		= __exit_p(ipu_remove),
+};
+
+static int __init ipu_init(void)
+{
+	return platform_driver_probe(&ipu_platform_driver, ipu_probe);
+}
+subsys_initcall(ipu_init);
+
+MODULE_DESCRIPTION("IPU core driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
+MODULE_ALIAS("platform:ipu-core");
diff --git a/drivers/dma/ipu/ipu_intern.h b/drivers/dma/ipu/ipu_intern.h
new file mode 100644
index 00000000..545cf11a
--- /dev/null
+++ b/drivers/dma/ipu/ipu_intern.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _IPU_INTERN_H_
+#define _IPU_INTERN_H_
+
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+/* IPU Common registers */
+#define IPU_CONF		0x00
+#define IPU_CHA_BUF0_RDY	0x04
+#define IPU_CHA_BUF1_RDY	0x08
+#define IPU_CHA_DB_MODE_SEL	0x0C
+#define IPU_CHA_CUR_BUF		0x10
+#define IPU_FS_PROC_FLOW	0x14
+#define IPU_FS_DISP_FLOW	0x18
+#define IPU_TASKS_STAT		0x1C
+#define IPU_IMA_ADDR		0x20
+#define IPU_IMA_DATA		0x24
+#define IPU_INT_CTRL_1		0x28
+#define IPU_INT_CTRL_2		0x2C
+#define IPU_INT_CTRL_3		0x30
+#define IPU_INT_CTRL_4		0x34
+#define IPU_INT_CTRL_5		0x38
+#define IPU_INT_STAT_1		0x3C
+#define IPU_INT_STAT_2		0x40
+#define IPU_INT_STAT_3		0x44
+#define IPU_INT_STAT_4		0x48
+#define IPU_INT_STAT_5		0x4C
+#define IPU_BRK_CTRL_1		0x50
+#define IPU_BRK_CTRL_2		0x54
+#define IPU_BRK_STAT		0x58
+#define IPU_DIAGB_CTRL		0x5C
+
+/* IPU_CONF Register bits */
+#define IPU_CONF_CSI_EN		0x00000001
+#define IPU_CONF_IC_EN		0x00000002
+#define IPU_CONF_ROT_EN		0x00000004
+#define IPU_CONF_PF_EN		0x00000008
+#define IPU_CONF_SDC_EN		0x00000010
+#define IPU_CONF_ADC_EN		0x00000020
+#define IPU_CONF_DI_EN		0x00000040
+#define IPU_CONF_DU_EN		0x00000080
+#define IPU_CONF_PXL_ENDIAN	0x00000100
+
+/* Image Converter Registers */
+#define IC_CONF			0x88
+#define IC_PRP_ENC_RSC		0x8C
+#define IC_PRP_VF_RSC		0x90
+#define IC_PP_RSC		0x94
+#define IC_CMBP_1		0x98
+#define IC_CMBP_2		0x9C
+#define PF_CONF			0xA0
+#define IDMAC_CONF		0xA4
+#define IDMAC_CHA_EN		0xA8
+#define IDMAC_CHA_PRI		0xAC
+#define IDMAC_CHA_BUSY		0xB0
+
+/* Image Converter Register bits */
+#define IC_CONF_PRPENC_EN	0x00000001
+#define IC_CONF_PRPENC_CSC1	0x00000002
+#define IC_CONF_PRPENC_ROT_EN	0x00000004
+#define IC_CONF_PRPVF_EN	0x00000100
+#define IC_CONF_PRPVF_CSC1	0x00000200
+#define IC_CONF_PRPVF_CSC2	0x00000400
+#define IC_CONF_PRPVF_CMB	0x00000800
+#define IC_CONF_PRPVF_ROT_EN	0x00001000
+#define IC_CONF_PP_EN		0x00010000
+#define IC_CONF_PP_CSC1		0x00020000
+#define IC_CONF_PP_CSC2		0x00040000
+#define IC_CONF_PP_CMB		0x00080000
+#define IC_CONF_PP_ROT_EN	0x00100000
+#define IC_CONF_IC_GLB_LOC_A	0x10000000
+#define IC_CONF_KEY_COLOR_EN	0x20000000
+#define IC_CONF_RWS_EN		0x40000000
+#define IC_CONF_CSI_MEM_WR_EN	0x80000000
+
+#define IDMA_CHAN_INVALID	0x000000FF
+#define IDMA_IC_0		0x00000001
+#define IDMA_IC_1		0x00000002
+#define IDMA_IC_2		0x00000004
+#define IDMA_IC_3		0x00000008
+#define IDMA_IC_4		0x00000010
+#define IDMA_IC_5		0x00000020
+#define IDMA_IC_6		0x00000040
+#define IDMA_IC_7		0x00000080
+#define IDMA_IC_8		0x00000100
+#define IDMA_IC_9		0x00000200
+#define IDMA_IC_10		0x00000400
+#define IDMA_IC_11		0x00000800
+#define IDMA_IC_12		0x00001000
+#define IDMA_IC_13		0x00002000
+#define IDMA_SDC_BG		0x00004000
+#define IDMA_SDC_FG		0x00008000
+#define IDMA_SDC_MASK		0x00010000
+#define IDMA_SDC_PARTIAL	0x00020000
+#define IDMA_ADC_SYS1_WR	0x00040000
+#define IDMA_ADC_SYS2_WR	0x00080000
+#define IDMA_ADC_SYS1_CMD	0x00100000
+#define IDMA_ADC_SYS2_CMD	0x00200000
+#define IDMA_ADC_SYS1_RD	0x00400000
+#define IDMA_ADC_SYS2_RD	0x00800000
+#define IDMA_PF_QP		0x01000000
+#define IDMA_PF_BSP		0x02000000
+#define IDMA_PF_Y_IN		0x04000000
+#define IDMA_PF_U_IN		0x08000000
+#define IDMA_PF_V_IN		0x10000000
+#define IDMA_PF_Y_OUT		0x20000000
+#define IDMA_PF_U_OUT		0x40000000
+#define IDMA_PF_V_OUT		0x80000000
+
+#define TSTAT_PF_H264_PAUSE	0x00000001
+#define TSTAT_CSI2MEM_MASK	0x0000000C
+#define TSTAT_CSI2MEM_OFFSET	2
+#define TSTAT_VF_MASK		0x00000600
+#define TSTAT_VF_OFFSET		9
+#define TSTAT_VF_ROT_MASK	0x000C0000
+#define TSTAT_VF_ROT_OFFSET	18
+#define TSTAT_ENC_MASK		0x00000180
+#define TSTAT_ENC_OFFSET	7
+#define TSTAT_ENC_ROT_MASK	0x00030000
+#define TSTAT_ENC_ROT_OFFSET	16
+#define TSTAT_PP_MASK		0x00001800
+#define TSTAT_PP_OFFSET		11
+#define TSTAT_PP_ROT_MASK	0x00300000
+#define TSTAT_PP_ROT_OFFSET	20
+#define TSTAT_PF_MASK		0x00C00000
+#define TSTAT_PF_OFFSET		22
+#define TSTAT_ADCSYS1_MASK	0x03000000
+#define TSTAT_ADCSYS1_OFFSET	24
+#define TSTAT_ADCSYS2_MASK	0x0C000000
+#define TSTAT_ADCSYS2_OFFSET	26
+
+#define TASK_STAT_IDLE		0
+#define TASK_STAT_ACTIVE	1
+#define TASK_STAT_WAIT4READY	2
+
+struct idmac {
+	struct dma_device	dma;
+};
+
+struct ipu {
+	void __iomem		*reg_ipu;
+	void __iomem		*reg_ic;
+	unsigned int		irq_fn;		/* IPU Function IRQ to the CPU */
+	unsigned int		irq_err;	/* IPU Error IRQ to the CPU */
+	unsigned int		irq_base;	/* Beginning of the IPU IRQ range */
+	unsigned long		channel_init_mask;
+	spinlock_t		lock;
+	struct clk		*ipu_clk;
+	struct device		*dev;
+	struct idmac		idmac;
+	struct idmac_channel	channel[IPU_CHANNELS_NUM];
+	struct tasklet_struct	tasklet;
+};
+
+#define to_idmac(d) container_of(d, struct idmac, dma)
+
+extern int ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev);
+extern void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev);
+
+extern bool ipu_irq_status(uint32_t irq);
+extern int ipu_irq_map(unsigned int source);
+extern int ipu_irq_unmap(unsigned int source);
+
+#endif
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
new file mode 100644
index 00000000..a71f55e7
--- /dev/null
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <mach/ipu.h>
+
+#include "ipu_intern.h"
+
+/*
+ * Register read / write - shall be inlined by the compiler
+ */
+static u32 ipu_read_reg(struct ipu *ipu, unsigned long reg)
+{
+	return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void ipu_write_reg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+	__raw_writel(value, ipu->reg_ipu + reg);
+}
+
+
+/*
+ * IPU IRQ chip driver
+ */
+
+#define IPU_IRQ_NR_FN_BANKS 3
+#define IPU_IRQ_NR_ERR_BANKS 2
+#define IPU_IRQ_NR_BANKS (IPU_IRQ_NR_FN_BANKS + IPU_IRQ_NR_ERR_BANKS)
+
+struct ipu_irq_bank {
+	unsigned int	control;
+	unsigned int	status;
+	spinlock_t	lock;
+	struct ipu	*ipu;
+};
+
+static struct ipu_irq_bank irq_bank[IPU_IRQ_NR_BANKS] = {
+	/* 3 groups of functional interrupts */
+	{
+		.control	= IPU_INT_CTRL_1,
+		.status		= IPU_INT_STAT_1,
+	}, {
+		.control	= IPU_INT_CTRL_2,
+		.status		= IPU_INT_STAT_2,
+	}, {
+		.control	= IPU_INT_CTRL_3,
+		.status		= IPU_INT_STAT_3,
+	},
+	/* 2 groups of error interrupts */
+	{
+		.control	= IPU_INT_CTRL_4,
+		.status		= IPU_INT_STAT_4,
+	}, {
+		.control	= IPU_INT_CTRL_5,
+		.status		= IPU_INT_STAT_5,
+	},
+};
+
+struct ipu_irq_map {
+	unsigned int		irq;
+	int			source;
+	struct ipu_irq_bank	*bank;
+	struct ipu		*ipu;
+};
+
+static struct ipu_irq_map irq_map[CONFIG_MX3_IPU_IRQS];
+/* Protects allocations from the above array of maps */
+static DEFINE_MUTEX(map_lock);
+/* Protects register accesses and individual mappings */
+static DEFINE_RAW_SPINLOCK(bank_lock);
+
+static struct ipu_irq_map *src2map(unsigned int src)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++)
+		if (irq_map[i].source == src)
+			return irq_map + i;
+
+	return NULL;
+}
+
+static void ipu_irq_unmask(struct irq_data *d)
+{
+	struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+	struct ipu_irq_bank *bank;
+	uint32_t reg;
+	unsigned long lock_flags;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+	bank = map->bank;
+	if (!bank) {
+		raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+		pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+		return;
+	}
+
+	reg = ipu_read_reg(bank->ipu, bank->control);
+	reg |= (1UL << (map->source & 31));
+	ipu_write_reg(bank->ipu, reg, bank->control);
+
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_mask(struct irq_data *d)
+{
+	struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+	struct ipu_irq_bank *bank;
+	uint32_t reg;
+	unsigned long lock_flags;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+	bank = map->bank;
+	if (!bank) {
+		raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+		pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+		return;
+	}
+
+	reg = ipu_read_reg(bank->ipu, bank->control);
+	reg &= ~(1UL << (map->source & 31));
+	ipu_write_reg(bank->ipu, reg, bank->control);
+
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_ack(struct irq_data *d)
+{
+	struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+	struct ipu_irq_bank *bank;
+	unsigned long lock_flags;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+	bank = map->bank;
+	if (!bank) {
+		raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+		pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+		return;
+	}
+
+	ipu_write_reg(bank->ipu, 1UL << (map->source & 31), bank->status);
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+/**
+ * ipu_irq_status() - returns the current interrupt status of the specified IRQ.
+ * @irq:	interrupt line to get status for.
+ * @return:	true if the interrupt is pending/asserted or false if the
+ *		interrupt is not pending.
+ */
+bool ipu_irq_status(unsigned int irq)
+{
+	struct ipu_irq_map *map = irq_get_chip_data(irq);
+	struct ipu_irq_bank *bank;
+	unsigned long lock_flags;
+	bool ret;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+	bank = map->bank;
+	ret = bank && ipu_read_reg(bank->ipu, bank->status) &
+		(1UL << (map->source & 31));
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+	return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source:	interrupt source bit position (see below)
+ * @return:	mapped IRQ number or negative error code
+ *
+ * The source parameter has to be explained further. On i.MX31 IPU has 137 IRQ
+ * sources, they are broken down in 5 32-bit registers, like 32, 32, 24, 32, 17.
+ * However, the source argument of this function is not the sequence number of
+ * the possible IRQ, but rather its bit position. So, first interrupt in fourth
+ * register has source number 96, and not 88. This makes calculations easier,
+ * and also provides forward compatibility with any future IPU implementations
+ * with any interrupt bit assignments.
+ */
+int ipu_irq_map(unsigned int source)
+{
+	int i, ret = -ENOMEM;
+	struct ipu_irq_map *map;
+
+	might_sleep();
+
+	mutex_lock(&map_lock);
+	map = src2map(source);
+	if (map) {
+		pr_err("IPU: Source %u already mapped to IRQ %u\n", source, map->irq);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+		if (irq_map[i].source < 0) {
+			unsigned long lock_flags;
+
+			raw_spin_lock_irqsave(&bank_lock, lock_flags);
+			irq_map[i].source = source;
+			irq_map[i].bank = irq_bank + source / 32;
+			raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+			ret = irq_map[i].irq;
+			pr_debug("IPU: mapped source %u to IRQ %u\n",
+				 source, ret);
+			break;
+		}
+	}
+out:
+	mutex_unlock(&map_lock);
+
+	if (ret < 0)
+		pr_err("IPU: couldn't map source %u: %d\n", source, ret);
+
+	return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source:	interrupt source bit position (see ipu_irq_map())
+ * @return:	0 or negative error code
+ */
+int ipu_irq_unmap(unsigned int source)
+{
+	int i, ret = -EINVAL;
+
+	might_sleep();
+
+	mutex_lock(&map_lock);
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+		if (irq_map[i].source == source) {
+			unsigned long lock_flags;
+
+			pr_debug("IPU: unmapped source %u from IRQ %u\n",
+				 source, irq_map[i].irq);
+
+			raw_spin_lock_irqsave(&bank_lock, lock_flags);
+			irq_map[i].source = -EINVAL;
+			irq_map[i].bank = NULL;
+			raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&map_lock);
+
+	return ret;
+}
+
+/* Chained IRQ handler for IPU error interrupt */
+static void ipu_irq_err(unsigned int irq, struct irq_desc *desc)
+{
+	struct ipu *ipu = irq_get_handler_data(irq);
+	u32 status;
+	int i, line;
+
+	for (i = IPU_IRQ_NR_FN_BANKS; i < IPU_IRQ_NR_BANKS; i++) {
+		struct ipu_irq_bank *bank = irq_bank + i;
+
+		raw_spin_lock(&bank_lock);
+		status = ipu_read_reg(ipu, bank->status);
+		/*
+		 * Don't think we have to clear all interrupts here, they will
+		 * be acked by ->handle_irq() (handle_level_irq). However, we
+		 * might want to clear unhandled interrupts after the loop...
+		 */
+		status &= ipu_read_reg(ipu, bank->control);
+		raw_spin_unlock(&bank_lock);
+		while ((line = ffs(status))) {
+			struct ipu_irq_map *map;
+
+			line--;
+			status &= ~(1UL << line);
+
+			raw_spin_lock(&bank_lock);
+			map = src2map(32 * i + line);
+			if (map)
+				irq = map->irq;
+			raw_spin_unlock(&bank_lock);
+
+			if (!map) {
+				pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
+				       line, i);
+				continue;
+			}
+			generic_handle_irq(irq);
+		}
+	}
+}
+
+/* Chained IRQ handler for IPU function interrupt */
+static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc)
+{
+	struct ipu *ipu = irq_desc_get_handler_data(desc);
+	u32 status;
+	int i, line;
+
+	for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) {
+		struct ipu_irq_bank *bank = irq_bank + i;
+
+		raw_spin_lock(&bank_lock);
+		status = ipu_read_reg(ipu, bank->status);
+		/* Not clearing all interrupts, see above */
+		status &= ipu_read_reg(ipu, bank->control);
+		raw_spin_unlock(&bank_lock);
+		while ((line = ffs(status))) {
+			struct ipu_irq_map *map;
+
+			line--;
+			status &= ~(1UL << line);
+
+			raw_spin_lock(&bank_lock);
+			map = src2map(32 * i + line);
+			if (map)
+				irq = map->irq;
+			raw_spin_unlock(&bank_lock);
+
+			if (!map) {
+				pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
+				       line, i);
+				continue;
+			}
+			generic_handle_irq(irq);
+		}
+	}
+}
+
+static struct irq_chip ipu_irq_chip = {
+	.name		= "ipu_irq",
+	.irq_ack	= ipu_irq_ack,
+	.irq_mask	= ipu_irq_mask,
+	.irq_unmask	= ipu_irq_unmask,
+};
+
+/* Install the IRQ handler */
+int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+	struct ipu_platform_data *pdata = dev->dev.platform_data;
+	unsigned int irq, irq_base, i;
+
+	irq_base = pdata->irq_base;
+
+	for (i = 0; i < IPU_IRQ_NR_BANKS; i++)
+		irq_bank[i].ipu = ipu;
+
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+		int ret;
+
+		irq = irq_base + i;
+		ret = irq_set_chip(irq, &ipu_irq_chip);
+		if (ret < 0)
+			return ret;
+		ret = irq_set_chip_data(irq, irq_map + i);
+		if (ret < 0)
+			return ret;
+		irq_map[i].ipu = ipu;
+		irq_map[i].irq = irq;
+		irq_map[i].source = -EINVAL;
+		irq_set_handler(irq, handle_level_irq);
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+#endif
+	}
+
+	irq_set_handler_data(ipu->irq_fn, ipu);
+	irq_set_chained_handler(ipu->irq_fn, ipu_irq_fn);
+
+	irq_set_handler_data(ipu->irq_err, ipu);
+	irq_set_chained_handler(ipu->irq_err, ipu_irq_err);
+
+	return 0;
+}
+
+void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+	struct ipu_platform_data *pdata = dev->dev.platform_data;
+	unsigned int irq, irq_base;
+
+	irq_base = pdata->irq_base;
+
+	irq_set_chained_handler(ipu->irq_fn, NULL);
+	irq_set_handler_data(ipu->irq_fn, NULL);
+
+	irq_set_chained_handler(ipu->irq_err, NULL);
+	irq_set_handler_data(ipu->irq_err, NULL);
+
+	for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) {
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, 0);
+#endif
+		irq_set_chip(irq, NULL);
+		irq_set_chip_data(irq, NULL);
+	}
+}
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
new file mode 100644
index 00000000..2ab0a3d0
--- /dev/null
+++ b/drivers/dma/mpc512x_dma.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009;  for details see www.osadl.org.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This is initial version of MPC5121 DMA driver. Only memory to memory
+ * transfers are supported (tested using dmatest module).
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <linux/random.h>
+
+#include "dmaengine.h"
+
+/* Number of DMA Transfer descriptors allocated per channel */
+#define MPC_DMA_DESCRIPTORS	64
+
+/* Macro definitions */
+#define MPC_DMA_CHANNELS	64
+#define MPC_DMA_TCD_OFFSET	0x1000
+
+/* Arbitration mode of group and channel */
+#define MPC_DMA_DMACR_EDCG	(1 << 31)
+#define MPC_DMA_DMACR_ERGA	(1 << 3)
+#define MPC_DMA_DMACR_ERCA	(1 << 2)
+
+/* Error codes */
+#define MPC_DMA_DMAES_VLD	(1 << 31)
+#define MPC_DMA_DMAES_GPE	(1 << 15)
+#define MPC_DMA_DMAES_CPE	(1 << 14)
+#define MPC_DMA_DMAES_ERRCHN(err) \
+				(((err) >> 8) & 0x3f)
+#define MPC_DMA_DMAES_SAE	(1 << 7)
+#define MPC_DMA_DMAES_SOE	(1 << 6)
+#define MPC_DMA_DMAES_DAE	(1 << 5)
+#define MPC_DMA_DMAES_DOE	(1 << 4)
+#define MPC_DMA_DMAES_NCE	(1 << 3)
+#define MPC_DMA_DMAES_SGE	(1 << 2)
+#define MPC_DMA_DMAES_SBE	(1 << 1)
+#define MPC_DMA_DMAES_DBE	(1 << 0)
+
+#define MPC_DMA_DMAGPOR_SNOOP_ENABLE	(1 << 6)
+
+#define MPC_DMA_TSIZE_1		0x00
+#define MPC_DMA_TSIZE_2		0x01
+#define MPC_DMA_TSIZE_4		0x02
+#define MPC_DMA_TSIZE_16	0x04
+#define MPC_DMA_TSIZE_32	0x05
+
+/* MPC5121 DMA engine registers */
+struct __attribute__ ((__packed__)) mpc_dma_regs {
+	/* 0x00 */
+	u32 dmacr;		/* DMA control register */
+	u32 dmaes;		/* DMA error status */
+	/* 0x08 */
+	u32 dmaerqh;		/* DMA enable request high(channels 63~32) */
+	u32 dmaerql;		/* DMA enable request low(channels 31~0) */
+	u32 dmaeeih;		/* DMA enable error interrupt high(ch63~32) */
+	u32 dmaeeil;		/* DMA enable error interrupt low(ch31~0) */
+	/* 0x18 */
+	u8 dmaserq;		/* DMA set enable request */
+	u8 dmacerq;		/* DMA clear enable request */
+	u8 dmaseei;		/* DMA set enable error interrupt */
+	u8 dmaceei;		/* DMA clear enable error interrupt */
+	/* 0x1c */
+	u8 dmacint;		/* DMA clear interrupt request */
+	u8 dmacerr;		/* DMA clear error */
+	u8 dmassrt;		/* DMA set start bit */
+	u8 dmacdne;		/* DMA clear DONE status bit */
+	/* 0x20 */
+	u32 dmainth;		/* DMA interrupt request high(ch63~32) */
+	u32 dmaintl;		/* DMA interrupt request low(ch31~0) */
+	u32 dmaerrh;		/* DMA error high(ch63~32) */
+	u32 dmaerrl;		/* DMA error low(ch31~0) */
+	/* 0x30 */
+	u32 dmahrsh;		/* DMA hw request status high(ch63~32) */
+	u32 dmahrsl;		/* DMA hardware request status low(ch31~0) */
+	union {
+		u32 dmaihsa;	/* DMA interrupt high select AXE(ch63~32) */
+		u32 dmagpor;	/* (General purpose register on MPC8308) */
+	};
+	u32 dmailsa;		/* DMA interrupt low select AXE(ch31~0) */
+	/* 0x40 ~ 0xff */
+	u32 reserve0[48];	/* Reserved */
+	/* 0x100 */
+	u8 dchpri[MPC_DMA_CHANNELS];
+	/* DMA channels(0~63) priority */
+};
+
+struct __attribute__ ((__packed__)) mpc_dma_tcd {
+	/* 0x00 */
+	u32 saddr;		/* Source address */
+
+	u32 smod:5;		/* Source address modulo */
+	u32 ssize:3;		/* Source data transfer size */
+	u32 dmod:5;		/* Destination address modulo */
+	u32 dsize:3;		/* Destination data transfer size */
+	u32 soff:16;		/* Signed source address offset */
+
+	/* 0x08 */
+	u32 nbytes;		/* Inner "minor" byte count */
+	u32 slast;		/* Last source address adjustment */
+	u32 daddr;		/* Destination address */
+
+	/* 0x14 */
+	u32 citer_elink:1;	/* Enable channel-to-channel linking on
+				 * minor loop complete
+				 */
+	u32 citer_linkch:6;	/* Link channel for minor loop complete */
+	u32 citer:9;		/* Current "major" iteration count */
+	u32 doff:16;		/* Signed destination address offset */
+
+	/* 0x18 */
+	u32 dlast_sga;		/* Last Destination address adjustment/scatter
+				 * gather address
+				 */
+
+	/* 0x1c */
+	u32 biter_elink:1;	/* Enable channel-to-channel linking on major
+				 * loop complete
+				 */
+	u32 biter_linkch:6;
+	u32 biter:9;		/* Beginning "major" iteration count */
+	u32 bwc:2;		/* Bandwidth control */
+	u32 major_linkch:6;	/* Link channel number */
+	u32 done:1;		/* Channel done */
+	u32 active:1;		/* Channel active */
+	u32 major_elink:1;	/* Enable channel-to-channel linking on major
+				 * loop complete
+				 */
+	u32 e_sg:1;		/* Enable scatter/gather processing */
+	u32 d_req:1;		/* Disable request */
+	u32 int_half:1;		/* Enable an interrupt when major counter is
+				 * half complete
+				 */
+	u32 int_maj:1;		/* Enable an interrupt when major iteration
+				 * count completes
+				 */
+	u32 start:1;		/* Channel start */
+};
+
+struct mpc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct mpc_dma_tcd		*tcd;
+	dma_addr_t			tcd_paddr;
+	int				error;
+	struct list_head		node;
+};
+
+struct mpc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	struct mpc_dma_tcd		*tcd;
+	dma_addr_t			tcd_paddr;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+};
+
+struct mpc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct mpc_dma_chan		channels[MPC_DMA_CHANNELS];
+	struct mpc_dma_regs __iomem	*regs;
+	struct mpc_dma_tcd __iomem	*tcd;
+	int				irq;
+	int				irq2;
+	uint				error_status;
+	int				is_mpc8308;
+
+	/* Lock for error_status field in this structure */
+	spinlock_t			error_status_lock;
+};
+
+#define DRV_NAME	"mpc512x_dma"
+
+/* Convert struct dma_chan to struct mpc_dma_chan */
+static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct mpc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct mpc_dma */
+static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c);
+	return container_of(mchan, struct mpc_dma, channels[c->chan_id]);
+}
+
+/*
+ * Execute all queued DMA descriptors.
+ *
+ * Following requirements must be met while calling mpc_dma_execute():
+ * 	a) mchan->lock is acquired,
+ * 	b) mchan->active list is empty,
+ * 	c) mchan->queued list contains at least one entry.
+ */
+static void mpc_dma_execute(struct mpc_dma_chan *mchan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
+	struct mpc_dma_desc *first = NULL;
+	struct mpc_dma_desc *prev = NULL;
+	struct mpc_dma_desc *mdesc;
+	int cid = mchan->chan.chan_id;
+
+	/* Move all queued descriptors to active list */
+	list_splice_tail_init(&mchan->queued, &mchan->active);
+
+	/* Chain descriptors into one transaction */
+	list_for_each_entry(mdesc, &mchan->active, node) {
+		if (!first)
+			first = mdesc;
+
+		if (!prev) {
+			prev = mdesc;
+			continue;
+		}
+
+		prev->tcd->dlast_sga = mdesc->tcd_paddr;
+		prev->tcd->e_sg = 1;
+		mdesc->tcd->start = 1;
+
+		prev = mdesc;
+	}
+
+	prev->tcd->int_maj = 1;
+
+	/* Send first descriptor in chain into hardware */
+	memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd));
+
+	if (first != prev)
+		mdma->tcd[cid].e_sg = 1;
+	out_8(&mdma->regs->dmassrt, cid);
+}
+
+/* Handle interrupt on one half of DMA controller (32 channels) */
+static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off)
+{
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma_desc *mdesc;
+	u32 status = is | es;
+	int ch;
+
+	while ((ch = fls(status) - 1) >= 0) {
+		status &= ~(1 << ch);
+		mchan = &mdma->channels[ch + off];
+
+		spin_lock(&mchan->lock);
+
+		out_8(&mdma->regs->dmacint, ch + off);
+		out_8(&mdma->regs->dmacerr, ch + off);
+
+		/* Check error status */
+		if (es & (1 << ch))
+			list_for_each_entry(mdesc, &mchan->active, node)
+				mdesc->error = -EIO;
+
+		/* Execute queued descriptors */
+		list_splice_tail_init(&mchan->active, &mchan->completed);
+		if (!list_empty(&mchan->queued))
+			mpc_dma_execute(mchan);
+
+		spin_unlock(&mchan->lock);
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t mpc_dma_irq(int irq, void *data)
+{
+	struct mpc_dma *mdma = data;
+	uint es;
+
+	/* Save error status register */
+	es = in_be32(&mdma->regs->dmaes);
+	spin_lock(&mdma->error_status_lock);
+	if ((es & MPC_DMA_DMAES_VLD) && mdma->error_status == 0)
+		mdma->error_status = es;
+	spin_unlock(&mdma->error_status_lock);
+
+	/* Handle interrupt on each channel */
+	if (mdma->dma.chancnt > 32) {
+		mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth),
+					in_be32(&mdma->regs->dmaerrh), 32);
+	}
+	mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl),
+					in_be32(&mdma->regs->dmaerrl), 0);
+
+	/* Schedule tasklet */
+	tasklet_schedule(&mdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void mpc_dma_process_completed(struct mpc_dma *mdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma_desc *mdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < mdma->dma.chancnt; i++) {
+		mchan = &mdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&mchan->lock, flags);
+		if (!list_empty(&mchan->completed))
+			list_splice_tail_init(&mchan->completed, &list);
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		if (list_empty(&list))
+			continue;
+
+		/* Execute callbacks and run dependencies */
+		list_for_each_entry(mdesc, &list, node) {
+			desc = &mdesc->desc;
+
+			if (desc->callback)
+				desc->callback(desc->callback_param);
+
+			last_cookie = desc->cookie;
+			dma_run_dependencies(desc);
+		}
+
+		/* Free descriptors */
+		spin_lock_irqsave(&mchan->lock, flags);
+		list_splice_tail_init(&list, &mchan->free);
+		mchan->chan.completed_cookie = last_cookie;
+		spin_unlock_irqrestore(&mchan->lock, flags);
+	}
+}
+
+/* DMA Tasklet */
+static void mpc_dma_tasklet(unsigned long data)
+{
+	struct mpc_dma *mdma = (void *)data;
+	unsigned long flags;
+	uint es;
+
+	spin_lock_irqsave(&mdma->error_status_lock, flags);
+	es = mdma->error_status;
+	mdma->error_status = 0;
+	spin_unlock_irqrestore(&mdma->error_status_lock, flags);
+
+	/* Print nice error report */
+	if (es) {
+		dev_err(mdma->dma.dev,
+			"Hardware reported following error(s) on channel %u:\n",
+						      MPC_DMA_DMAES_ERRCHN(es));
+
+		if (es & MPC_DMA_DMAES_GPE)
+			dev_err(mdma->dma.dev, "- Group Priority Error\n");
+		if (es & MPC_DMA_DMAES_CPE)
+			dev_err(mdma->dma.dev, "- Channel Priority Error\n");
+		if (es & MPC_DMA_DMAES_SAE)
+			dev_err(mdma->dma.dev, "- Source Address Error\n");
+		if (es & MPC_DMA_DMAES_SOE)
+			dev_err(mdma->dma.dev, "- Source Offset"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_DAE)
+			dev_err(mdma->dma.dev, "- Destination Address"
+								" Error\n");
+		if (es & MPC_DMA_DMAES_DOE)
+			dev_err(mdma->dma.dev, "- Destination Offset"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_NCE)
+			dev_err(mdma->dma.dev, "- NBytes/Citter"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_SGE)
+			dev_err(mdma->dma.dev, "- Scatter/Gather"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_SBE)
+			dev_err(mdma->dma.dev, "- Source Bus Error\n");
+		if (es & MPC_DMA_DMAES_DBE)
+			dev_err(mdma->dma.dev, "- Destination Bus Error\n");
+	}
+
+	mpc_dma_process_completed(mdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(txd->chan);
+	struct mpc_dma_desc *mdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	mdesc = container_of(txd, struct mpc_dma_desc, desc);
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&mdesc->node, &mchan->queued);
+
+	/* If channel is idle, execute all queued descriptors */
+	if (list_empty(&mchan->active))
+		mpc_dma_execute(mchan);
+
+	/* Update cookie */
+	cookie = dma_cookie_assign(txd);
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	return cookie;
+}
+
+/* Alloc channel resources */
+static int mpc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc;
+	struct mpc_dma_tcd *tcd;
+	dma_addr_t tcd_paddr;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc DMA memory for Transfer Control Descriptors */
+	tcd = dma_alloc_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+							&tcd_paddr, GFP_KERNEL);
+	if (!tcd)
+		return -ENOMEM;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) {
+		mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL);
+		if (!mdesc) {
+			dev_notice(mdma->dma.dev, "Memory allocation error. "
+					"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&mdesc->desc, chan);
+		mdesc->desc.flags = DMA_CTRL_ACK;
+		mdesc->desc.tx_submit = mpc_dma_tx_submit;
+
+		mdesc->tcd = &tcd[i];
+		mdesc->tcd_paddr = tcd_paddr + (i * sizeof(struct mpc_dma_tcd));
+
+		list_add_tail(&mdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0) {
+		dma_free_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+								tcd, tcd_paddr);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&mchan->lock, flags);
+	mchan->tcd = tcd;
+	mchan->tcd_paddr = tcd_paddr;
+	list_splice_tail_init(&descs, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* Enable Error Interrupt */
+	out_8(&mdma->regs->dmaseei, chan->chan_id);
+
+	return 0;
+}
+
+/* Free channel resources */
+static void mpc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc, *tmp;
+	struct mpc_dma_tcd *tcd;
+	dma_addr_t tcd_paddr;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&mchan->prepared));
+	BUG_ON(!list_empty(&mchan->queued));
+	BUG_ON(!list_empty(&mchan->active));
+	BUG_ON(!list_empty(&mchan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&mchan->free, &descs);
+	tcd = mchan->tcd;
+	tcd_paddr = mchan->tcd_paddr;
+
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* Free DMA memory used by descriptors */
+	dma_free_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+								tcd, tcd_paddr);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(mdesc, tmp, &descs, node)
+		kfree(mdesc);
+
+	/* Disable Error Interrupt */
+	out_8(&mdma->regs->dmaceei, chan->chan_id);
+}
+
+/* Send all pending descriptor to hardware */
+static void mpc_dma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * We are posting descriptors to the hardware as soon as
+	 * they are ready, so this function does nothing.
+	 */
+}
+
+/* Check request completion status */
+static enum dma_status
+mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	       struct dma_tx_state *txstate)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mchan->lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	return ret;
+}
+
+/* Prepare descriptor for memory to memory copy */
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+					size_t len, unsigned long flags)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct mpc_dma_desc,
+									node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	if (!mdesc) {
+		/* try to free completed descriptors */
+		mpc_dma_process_completed(mdma);
+		return NULL;
+	}
+
+	mdesc->error = 0;
+	tcd = mdesc->tcd;
+
+	/* Prepare Transfer Control Descriptor for this transaction */
+	memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+	if (IS_ALIGNED(src | dst | len, 32)) {
+		tcd->ssize = MPC_DMA_TSIZE_32;
+		tcd->dsize = MPC_DMA_TSIZE_32;
+		tcd->soff = 32;
+		tcd->doff = 32;
+	} else if (!mdma->is_mpc8308 && IS_ALIGNED(src | dst | len, 16)) {
+		/* MPC8308 doesn't support 16 byte transfers */
+		tcd->ssize = MPC_DMA_TSIZE_16;
+		tcd->dsize = MPC_DMA_TSIZE_16;
+		tcd->soff = 16;
+		tcd->doff = 16;
+	} else if (IS_ALIGNED(src | dst | len, 4)) {
+		tcd->ssize = MPC_DMA_TSIZE_4;
+		tcd->dsize = MPC_DMA_TSIZE_4;
+		tcd->soff = 4;
+		tcd->doff = 4;
+	} else if (IS_ALIGNED(src | dst | len, 2)) {
+		tcd->ssize = MPC_DMA_TSIZE_2;
+		tcd->dsize = MPC_DMA_TSIZE_2;
+		tcd->soff = 2;
+		tcd->doff = 2;
+	} else {
+		tcd->ssize = MPC_DMA_TSIZE_1;
+		tcd->dsize = MPC_DMA_TSIZE_1;
+		tcd->soff = 1;
+		tcd->doff = 1;
+	}
+
+	tcd->saddr = src;
+	tcd->daddr = dst;
+	tcd->nbytes = len;
+	tcd->biter = 1;
+	tcd->citer = 1;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return &mdesc->desc;
+}
+
+static int __devinit mpc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct mpc_dma *mdma;
+	struct mpc_dma_chan *mchan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	int retval, i;
+
+	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
+	if (!mdma) {
+		dev_err(dev, "Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	mdma->irq = irq_of_parse_and_map(dn, 0);
+	if (mdma->irq == NO_IRQ) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		return -EINVAL;
+	}
+
+	if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) {
+		mdma->is_mpc8308 = 1;
+		mdma->irq2 = irq_of_parse_and_map(dn, 1);
+		if (mdma->irq2 == NO_IRQ) {
+			dev_err(dev, "Error mapping IRQ!\n");
+			return -EINVAL;
+		}
+	}
+
+	retval = of_address_to_resource(dn, 0, &res);
+	if (retval) {
+		dev_err(dev, "Error parsing memory region!\n");
+		return retval;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) {
+		dev_err(dev, "Error requesting memory region!\n");
+		return -EBUSY;
+	}
+
+	mdma->regs = devm_ioremap(dev, regs_start, regs_size);
+	if (!mdma->regs) {
+		dev_err(dev, "Error mapping memory region!\n");
+		return -ENOMEM;
+	}
+
+	mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs)
+							+ MPC_DMA_TCD_OFFSET);
+
+	retval = devm_request_irq(dev, mdma->irq, &mpc_dma_irq, 0, DRV_NAME,
+									mdma);
+	if (retval) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		return -EINVAL;
+	}
+
+	if (mdma->is_mpc8308) {
+		retval = devm_request_irq(dev, mdma->irq2, &mpc_dma_irq, 0,
+				DRV_NAME, mdma);
+		if (retval) {
+			dev_err(dev, "Error requesting IRQ2!\n");
+			return -EINVAL;
+		}
+	}
+
+	spin_lock_init(&mdma->error_status_lock);
+
+	dma = &mdma->dma;
+	dma->dev = dev;
+	if (!mdma->is_mpc8308)
+		dma->chancnt = MPC_DMA_CHANNELS;
+	else
+		dma->chancnt = 16; /* MPC8308 DMA has only 16 channels */
+	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
+	dma->device_issue_pending = mpc_dma_issue_pending;
+	dma->device_tx_status = mpc_dma_tx_status;
+	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		mchan = &mdma->channels[i];
+
+		mchan->chan.device = dma;
+		dma_cookie_init(&mchan->chan);
+
+		INIT_LIST_HEAD(&mchan->free);
+		INIT_LIST_HEAD(&mchan->prepared);
+		INIT_LIST_HEAD(&mchan->queued);
+		INIT_LIST_HEAD(&mchan->active);
+		INIT_LIST_HEAD(&mchan->completed);
+
+		spin_lock_init(&mchan->lock);
+		list_add_tail(&mchan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&mdma->tasklet, mpc_dma_tasklet, (unsigned long)mdma);
+
+	/*
+	 * Configure DMA Engine:
+	 * - Dynamic clock,
+	 * - Round-robin group arbitration,
+	 * - Round-robin channel arbitration.
+	 */
+	if (!mdma->is_mpc8308) {
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
+					MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
+
+		/* Disable hardware DMA requests */
+		out_be32(&mdma->regs->dmaerqh, 0);
+		out_be32(&mdma->regs->dmaerql, 0);
+
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeih, 0);
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
+		out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
+		out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
+
+		/* Route interrupts to IPIC */
+		out_be32(&mdma->regs->dmaihsa, 0);
+		out_be32(&mdma->regs->dmailsa, 0);
+	} else {
+		/* MPC8308 has 16 channels and lacks some registers */
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+		/* enable snooping */
+		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmaintl, 0xFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+	}
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, mdma);
+	retval = dma_async_device_register(dma);
+	if (retval) {
+		devm_free_irq(dev, mdma->irq, mdma);
+		irq_dispose_mapping(mdma->irq);
+	}
+
+	return retval;
+}
+
+static int __devexit mpc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct mpc_dma *mdma = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&mdma->dma);
+	devm_free_irq(dev, mdma->irq, mdma);
+	irq_dispose_mapping(mdma->irq);
+
+	return 0;
+}
+
+static struct of_device_id mpc_dma_match[] = {
+	{ .compatible = "fsl,mpc5121-dma", },
+	{},
+};
+
+static struct platform_driver mpc_dma_driver = {
+	.probe		= mpc_dma_probe,
+	.remove		= __devexit_p(mpc_dma_remove),
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table	= mpc_dma_match,
+	},
+};
+
+module_platform_driver(mpc_dma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Piotr Ziecik <kosmo@semihalf.com>");
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 00000000..fa5d55fe
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1356 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <plat/mv_xor.h>
+
+#include "dmaengine.h"
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan)		\
+	container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev)		\
+	container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx)		\
+	container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+	hw_desc->status = (1 << 31);
+	hw_desc->phy_next_desc = 0;
+	hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+				int src_idx)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+				   u32 byte_count)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+				  u32 next_desc_addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	BUG_ON(hw_desc->phy_next_desc);
+	hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+	desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+				  dma_addr_t addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+	return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+				 int index, dma_addr_t addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_src_addr[index] = addr;
+	if (desc->type == DMA_XOR)
+		hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+	return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+					u32 next_desc_addr)
+{
+	__raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+	__raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+	__raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+	__raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+	__raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+	u32 val = __raw_readl(XOR_INTR_MASK(chan));
+	val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+	__raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+	u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+	intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+	return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+	if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+		return 1;
+
+	return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+	u32 val = ~(1 << (chan->idx * 16));
+	dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+	__raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+	u32 val = 0xFFFF0000 >> (chan->idx * 16);
+	__raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc_slot *chain_old_tail = list_entry(
+		desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+	if (chain_old_tail->type != desc->type)
+		return 0;
+	if (desc->type == DMA_MEMSET)
+		return 0;
+
+	return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+			       enum dma_transaction_type type)
+{
+	u32 op_mode;
+	u32 config = __raw_readl(XOR_CONFIG(chan));
+
+	switch (type) {
+	case DMA_XOR:
+		op_mode = XOR_OPERATION_MODE_XOR;
+		break;
+	case DMA_MEMCPY:
+		op_mode = XOR_OPERATION_MODE_MEMCPY;
+		break;
+	case DMA_MEMSET:
+		op_mode = XOR_OPERATION_MODE_MEMSET;
+		break;
+	default:
+		dev_printk(KERN_ERR, chan->device->common.dev,
+			   "error: unsupported operation %d.\n",
+			   type);
+		BUG();
+		return;
+	}
+
+	config &= ~0x7;
+	config |= op_mode;
+	__raw_writel(config, XOR_CONFIG(chan));
+	chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+	u32 activation;
+
+	dev_dbg(chan->device->common.dev, " activate chan.\n");
+	activation = __raw_readl(XOR_ACTIVATION(chan));
+	activation |= 0x1;
+	__raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+	u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+	state = (state >> 4) & 0x3;
+
+	return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+	return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+			      struct mv_xor_desc_slot *slot)
+{
+	dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+		__func__, __LINE__, slot);
+
+	slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+				   struct mv_xor_desc_slot *sw_desc)
+{
+	dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+		__func__, __LINE__, sw_desc);
+	if (sw_desc->type != mv_chan->current_type)
+		mv_set_mode(mv_chan, sw_desc->type);
+
+	if (sw_desc->type == DMA_MEMSET) {
+		/* for memset requests we need to program the engine, no
+		 * descriptors used.
+		 */
+		struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+		mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+		mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+		mv_chan_set_value(mv_chan, sw_desc->value);
+	} else {
+		/* set the hardware chain */
+		mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+	}
+	mv_chan->pending += sw_desc->slot_cnt;
+	mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+	struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+	BUG_ON(desc->async_tx.cookie < 0);
+
+	if (desc->async_tx.cookie > 0) {
+		cookie = desc->async_tx.cookie;
+
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		if (desc->async_tx.callback)
+			desc->async_tx.callback(
+				desc->async_tx.callback_param);
+
+		/* unmap dma addresses
+		 * (unmap_single vs unmap_page?)
+		 */
+		if (desc->group_head && desc->unmap_len) {
+			struct mv_xor_desc_slot *unmap = desc->group_head;
+			struct device *dev =
+				&mv_chan->device->pdev->dev;
+			u32 len = unmap->unmap_len;
+			enum dma_ctrl_flags flags = desc->async_tx.flags;
+			u32 src_cnt;
+			dma_addr_t addr;
+			dma_addr_t dest;
+
+			src_cnt = unmap->unmap_src_cnt;
+			dest = mv_desc_get_dest_addr(unmap);
+			if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+				enum dma_data_direction dir;
+
+				if (src_cnt > 1) /* is xor ? */
+					dir = DMA_BIDIRECTIONAL;
+				else
+					dir = DMA_FROM_DEVICE;
+				dma_unmap_page(dev, dest, len, dir);
+			}
+
+			if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+				while (src_cnt--) {
+					addr = mv_desc_get_src_addr(unmap,
+								    src_cnt);
+					if (addr == dest)
+						continue;
+					dma_unmap_page(dev, addr, len,
+						       DMA_TO_DEVICE);
+				}
+			}
+			desc->group_head = NULL;
+		}
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(&desc->async_tx);
+
+	return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter, *_iter;
+
+	dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+				 completed_node) {
+
+		if (async_tx_test_ack(&iter->async_tx)) {
+			list_del(&iter->completed_node);
+			mv_xor_free_slots(mv_chan, iter);
+		}
+	}
+	return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+	struct mv_xor_chan *mv_chan)
+{
+	dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+		__func__, __LINE__, desc, desc->async_tx.flags);
+	list_del(&desc->chain_node);
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/* move this slot to the completed_slots */
+		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+		return 0;
+	}
+
+	mv_xor_free_slots(mv_chan, desc);
+	return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter, *_iter;
+	dma_cookie_t cookie = 0;
+	int busy = mv_chan_is_busy(mv_chan);
+	u32 current_desc = mv_chan_get_current_desc(mv_chan);
+	int seen_current = 0;
+
+	dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+	dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+	mv_xor_clean_completed_slots(mv_chan);
+
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+
+	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+					chain_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (iter->async_tx.phys == current_desc) {
+			seen_current = 1;
+			if (busy)
+				break;
+		}
+
+		cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+		if (mv_xor_clean_slot(iter, mv_chan))
+			break;
+	}
+
+	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+		struct mv_xor_desc_slot *chain_head;
+		chain_head = list_entry(mv_chan->chain.next,
+					struct mv_xor_desc_slot,
+					chain_node);
+
+		mv_xor_start_new_chain(mv_chan, chain_head);
+	}
+
+	if (cookie > 0)
+		mv_chan->common.completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+	spin_lock_bh(&mv_chan->lock);
+	__mv_xor_slot_cleanup(mv_chan);
+	spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+	mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+		    int slots_per_op)
+{
+	struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+	LIST_HEAD(chain);
+	int slots_found, retry = 0;
+
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = mv_chan->last_used;
+	else
+		iter = list_entry(&mv_chan->all_slots,
+			struct mv_xor_desc_slot,
+			slot_node);
+
+	list_for_each_entry_safe_continue(
+		iter, _iter, &mv_chan->all_slots, slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			/* give up after finding the first busy slot
+			 * on the second pass through the list
+			 */
+			if (retry)
+				break;
+
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++)
+			alloc_start = iter;
+
+		if (slots_found == num_slots) {
+			struct mv_xor_desc_slot *alloc_tail = NULL;
+			struct mv_xor_desc_slot *last_used = NULL;
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+
+				/* pre-ack all but the last descriptor */
+				async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct mv_xor_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->tx_list);
+			mv_chan->last_used = last_used;
+			mv_desc_clear_next_desc(alloc_start);
+			mv_desc_clear_next_desc(alloc_tail);
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* try to free some slots if the allocation fails */
+	tasklet_schedule(&mv_chan->irq_tasklet);
+
+	return NULL;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+	struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+	dma_cookie_t cookie;
+	int new_hw_chain = 1;
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p: async_tx %p\n",
+		__func__, sw_desc, &sw_desc->async_tx);
+
+	grp_start = sw_desc->group_head;
+
+	spin_lock_bh(&mv_chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&mv_chan->chain))
+		list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
+	else {
+		new_hw_chain = 0;
+
+		old_chain_tail = list_entry(mv_chan->chain.prev,
+					    struct mv_xor_desc_slot,
+					    chain_node);
+		list_splice_init(&grp_start->tx_list,
+				 &old_chain_tail->chain_node);
+
+		if (!mv_can_chain(grp_start))
+			goto submit_done;
+
+		dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+			old_chain_tail->async_tx.phys);
+
+		/* fix up the hardware chain */
+		mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+		/* if the channel is not busy */
+		if (!mv_chan_is_busy(mv_chan)) {
+			u32 current_desc = mv_chan_get_current_desc(mv_chan);
+			/*
+			 * and the curren desc is the end of the chain before
+			 * the append, then we need to start the channel
+			 */
+			if (current_desc == old_chain_tail->async_tx.phys)
+				new_hw_chain = 1;
+		}
+	}
+
+	if (new_hw_chain)
+		mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+	spin_unlock_bh(&mv_chan->lock);
+
+	return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+	char *hw_desc;
+	int idx;
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *slot = NULL;
+	struct mv_xor_platform_data *plat_data =
+		mv_chan->device->pdev->dev.platform_data;
+	int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+	/* Allocate descriptor slots */
+	idx = mv_chan->slots_allocated;
+	while (idx < num_descs_in_pool) {
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "MV XOR Channel only initialized"
+				" %d descriptor slots", idx);
+			break;
+		}
+		hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = mv_xor_tx_submit;
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->tx_list);
+		hw_desc = (char *) mv_chan->device->dma_desc_pool;
+		slot->async_tx.phys =
+			(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+		slot->idx = idx++;
+
+		spin_lock_bh(&mv_chan->lock);
+		mv_chan->slots_allocated = idx;
+		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+		spin_unlock_bh(&mv_chan->lock);
+	}
+
+	if (mv_chan->slots_allocated && !mv_chan->last_used)
+		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+					struct mv_xor_desc_slot,
+					slot_node);
+
+	dev_dbg(mv_chan->device->common.dev,
+		"allocated %d descriptor slots last_used: %p\n",
+		mv_chan->slots_allocated, mv_chan->last_used);
+
+	return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc, *grp_start;
+	int slot_cnt;
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s dest: %x src %x len: %u flags: %ld\n",
+		__func__, dest, src, len, flags);
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
+
+	spin_lock_bh(&mv_chan->lock);
+	slot_cnt = mv_chan_memcpy_slot_count(len);
+	sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+	if (sw_desc) {
+		sw_desc->type = DMA_MEMCPY;
+		sw_desc->async_tx.flags = flags;
+		grp_start = sw_desc->group_head;
+		mv_desc_init(grp_start, flags);
+		mv_desc_set_byte_count(grp_start, len);
+		mv_desc_set_dest_addr(sw_desc->group_head, dest);
+		mv_desc_set_src_addr(grp_start, 0, src);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+	}
+	spin_unlock_bh(&mv_chan->lock);
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p async_tx %p\n",
+		__func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+		       size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc, *grp_start;
+	int slot_cnt;
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s dest: %x len: %u flags: %ld\n",
+		__func__, dest, len, flags);
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
+
+	spin_lock_bh(&mv_chan->lock);
+	slot_cnt = mv_chan_memset_slot_count(len);
+	sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+	if (sw_desc) {
+		sw_desc->type = DMA_MEMSET;
+		sw_desc->async_tx.flags = flags;
+		grp_start = sw_desc->group_head;
+		mv_desc_init(grp_start, flags);
+		mv_desc_set_byte_count(grp_start, len);
+		mv_desc_set_dest_addr(sw_desc->group_head, dest);
+		mv_desc_set_block_fill_val(grp_start, value);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+	}
+	spin_unlock_bh(&mv_chan->lock);
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p async_tx %p \n",
+		__func__, sw_desc, &sw_desc->async_tx);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc, *grp_start;
+	int slot_cnt;
+
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s src_cnt: %d len: dest %x %u flags: %ld\n",
+		__func__, src_cnt, len, dest, flags);
+
+	spin_lock_bh(&mv_chan->lock);
+	slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+	sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+	if (sw_desc) {
+		sw_desc->type = DMA_XOR;
+		sw_desc->async_tx.flags = flags;
+		grp_start = sw_desc->group_head;
+		mv_desc_init(grp_start, flags);
+		/* the byte count field is the same as in memcpy desc*/
+		mv_desc_set_byte_count(grp_start, len);
+		mv_desc_set_dest_addr(sw_desc->group_head, dest);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		while (src_cnt--)
+			mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+	}
+	spin_unlock_bh(&mv_chan->lock);
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p async_tx %p \n",
+		__func__, sw_desc, &sw_desc->async_tx);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	mv_xor_slot_cleanup(mv_chan);
+
+	spin_lock_bh(&mv_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+				 completed_node) {
+		in_use_descs++;
+		list_del(&iter->completed_node);
+	}
+	list_for_each_entry_safe_reverse(
+		iter, _iter, &mv_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		mv_chan->slots_allocated--;
+	}
+	mv_chan->last_used = NULL;
+
+	dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+		__func__, mv_chan->slots_allocated);
+	spin_unlock_bh(&mv_chan->lock);
+
+	if (in_use_descs)
+		dev_err(mv_chan->device->common.dev,
+			"freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_status - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ * @txstate: XOR transactions state holder (or NULL)
+ */
+static enum dma_status mv_xor_status(struct dma_chan *chan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS) {
+		mv_xor_clean_completed_slots(mv_chan);
+		return ret;
+	}
+	mv_xor_slot_cleanup(mv_chan);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+	u32 val;
+
+	val = __raw_readl(XOR_CONFIG(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "config       0x%08x.\n", val);
+
+	val = __raw_readl(XOR_ACTIVATION(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "activation   0x%08x.\n", val);
+
+	val = __raw_readl(XOR_INTR_CAUSE(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "intr cause   0x%08x.\n", val);
+
+	val = __raw_readl(XOR_INTR_MASK(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "intr mask    0x%08x.\n", val);
+
+	val = __raw_readl(XOR_ERROR_CAUSE(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "error cause  0x%08x.\n", val);
+
+	val = __raw_readl(XOR_ERROR_ADDR(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "error addr   0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+					 u32 intr_cause)
+{
+	if (intr_cause & (1 << 4)) {
+	     dev_dbg(chan->device->common.dev,
+		     "ignore this error\n");
+	     return;
+	}
+
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "error on chan %d. intr cause 0x%08x.\n",
+		   chan->idx, intr_cause);
+
+	mv_dump_xor_regs(chan);
+	BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+	struct mv_xor_chan *chan = data;
+	u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+	dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+	if (mv_is_err_intr(intr_cause))
+		mv_xor_err_interrupt_handler(chan, intr_cause);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	mv_xor_device_clear_eoc_cause(chan);
+
+	return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+	if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+		mv_chan->pending = 0;
+		mv_chan_activate(mv_chan);
+	}
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+	int i;
+	void *src, *dest;
+	dma_addr_t src_dma, dest_dma;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	struct mv_xor_chan *mv_chan;
+
+	src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+		((u8 *) src)[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dest_dma = dma_map_single(dma_chan->device->dev, dest,
+				  MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+	src_dma = dma_map_single(dma_chan->device->dev, src,
+				 MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+	tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				    MV_XOR_TEST_SIZE, 0);
+	cookie = mv_xor_tx_submit(tx);
+	mv_xor_issue_pending(dma_chan);
+	async_tx_ack(tx);
+	msleep(1);
+
+	if (mv_xor_status(dma_chan, cookie, NULL) !=
+	    DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			   "Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	mv_chan = to_mv_xor_chan(dma_chan);
+	dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+				MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			   "Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	mv_xor_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+	dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+	dma_addr_t dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	int err = 0;
+	struct mv_xor_chan *mv_chan;
+
+	for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+		(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+				DMA_FROM_DEVICE);
+
+	for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					   0, PAGE_SIZE, DMA_TO_DEVICE);
+
+	tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				 MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+	cookie = mv_xor_tx_submit(tx);
+	mv_xor_issue_pending(dma_chan);
+	async_tx_ack(tx);
+	msleep(8);
+
+	if (mv_xor_status(dma_chan, cookie, NULL) !=
+	    DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			   "Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	mv_chan = to_mv_xor_chan(dma_chan);
+	dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_printk(KERN_ERR, dma_chan->device->dev,
+				   "Self-test xor failed compare, disabling."
+				   " index %d, data %x, expected %x\n", i,
+				   ptr[i], cmp_word);
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+free_resources:
+	mv_xor_free_chan_resources(dma_chan);
+out:
+	src_idx = MV_XOR_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+	struct mv_xor_device *device = platform_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+	struct mv_xor_chan *mv_chan;
+	struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+	dma_async_device_unregister(&device->common);
+
+	dma_free_coherent(&dev->dev, plat_data->pool_size,
+			device->dma_desc_pool_virt, device->dma_desc_pool);
+
+	list_for_each_entry_safe(chan, _chan, &device->common.channels,
+				device_node) {
+		mv_chan = to_mv_xor_chan(chan);
+		list_del(&chan->device_node);
+	}
+
+	return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	int irq;
+	struct mv_xor_device *adev;
+	struct mv_xor_chan *mv_chan;
+	struct dma_device *dma_dev;
+	struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+	adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+
+	dma_dev = &adev->common;
+
+	/* allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+							  plat_data->pool_size,
+							  &adev->dma_desc_pool,
+							  GFP_KERNEL);
+	if (!adev->dma_desc_pool_virt)
+		return -ENOMEM;
+
+	adev->id = plat_data->hw_id;
+
+	/* discover transaction capabilites from the platform data */
+	dma_dev->cap_mask = plat_data->cap_mask;
+	adev->pdev = pdev;
+	platform_set_drvdata(pdev, adev);
+
+	adev->shared = platform_get_drvdata(plat_data->shared);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+	dma_dev->device_tx_status = mv_xor_status;
+	dma_dev->device_issue_pending = mv_xor_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+	if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->max_xor = 8;
+		dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+	}
+
+	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+	if (!mv_chan) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	mv_chan->device = adev;
+	mv_chan->idx = plat_data->hw_id;
+	mv_chan->mmr_base = adev->shared->xor_base;
+
+	if (!mv_chan->mmr_base) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+		     mv_chan);
+
+	/* clear errors before enabling interrupts */
+	mv_xor_device_clear_err_status(mv_chan);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto err_free_dma;
+	}
+	ret = devm_request_irq(&pdev->dev, irq,
+			       mv_xor_interrupt_handler,
+			       0, dev_name(&pdev->dev), mv_chan);
+	if (ret)
+		goto err_free_dma;
+
+	mv_chan_unmask_interrupts(mv_chan);
+
+	mv_set_mode(mv_chan, DMA_MEMCPY);
+
+	spin_lock_init(&mv_chan->lock);
+	INIT_LIST_HEAD(&mv_chan->chain);
+	INIT_LIST_HEAD(&mv_chan->completed_slots);
+	INIT_LIST_HEAD(&mv_chan->all_slots);
+	mv_chan->common.device = dma_dev;
+	dma_cookie_init(&mv_chan->common);
+
+	list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		ret = mv_xor_memcpy_self_test(adev);
+		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+		if (ret)
+			goto err_free_dma;
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		ret = mv_xor_xor_self_test(adev);
+		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+		if (ret)
+			goto err_free_dma;
+	}
+
+	dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+	  "( %s%s%s%s)\n",
+	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
+	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+	dma_async_device_register(dma_dev);
+	goto out;
+
+ err_free_dma:
+	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+			adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+	return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+			 const struct mbus_dram_target_info *dram)
+{
+	void __iomem *base = msp->xor_base;
+	u32 win_enable = 0;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		writel(0, base + WINDOW_BASE(i));
+		writel(0, base + WINDOW_SIZE(i));
+		if (i < 4)
+			writel(0, base + WINDOW_REMAP_HIGH(i));
+	}
+
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		writel((cs->base & 0xffff0000) |
+		       (cs->mbus_attr << 8) |
+		       dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+		win_enable |= (1 << i);
+		win_enable |= 3 << (16 + (2 * i));
+	}
+
+	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+	.probe		= mv_xor_probe,
+	.remove		= __devexit_p(mv_xor_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= MV_XOR_NAME,
+	},
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+	const struct mbus_dram_target_info *dram;
+	struct mv_xor_shared_private *msp;
+	struct resource *res;
+
+	dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+	msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+	if (!msp)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+				     resource_size(res));
+	if (!msp->xor_base)
+		return -EBUSY;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+					  resource_size(res));
+	if (!msp->xor_high_base)
+		return -EBUSY;
+
+	platform_set_drvdata(pdev, msp);
+
+	/*
+	 * (Re-)program MBUS remapping windows if we are asked to.
+	 */
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv_xor_conf_mbus_windows(msp, dram);
+
+	return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+	.probe		= mv_xor_shared_probe,
+	.remove		= mv_xor_shared_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= MV_XOR_SHARED_NAME,
+	},
+};
+
+
+static int __init mv_xor_init(void)
+{
+	int rc;
+
+	rc = platform_driver_register(&mv_xor_shared_driver);
+	if (!rc) {
+		rc = platform_driver_register(&mv_xor_driver);
+		if (rc)
+			platform_driver_unregister(&mv_xor_shared_driver);
+	}
+	return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+	platform_driver_unregister(&mv_xor_driver);
+	platform_driver_unregister(&mv_xor_shared_driver);
+	return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 00000000..654876b7
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE		64
+#define MV_XOR_THRESHOLD		1
+
+#define XOR_OPERATION_MODE_XOR		0
+#define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_OPERATION_MODE_MEMSET	4
+
+#define XOR_CURR_DESC(chan)	(chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)	(chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)	(chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan)	(chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)	(chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan)	(chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan)	(chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan)	(chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan)	(chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan)	(chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan)	(chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE	0x3F5
+
+#define WINDOW_BASE(w)		(0x250 + ((w) << 2))
+#define WINDOW_SIZE(w)		(0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)	(0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)	(0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+	void __iomem	*xor_base;
+	void __iomem	*xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+	struct platform_device		*pdev;
+	int				id;
+	dma_addr_t			dma_desc_pool;
+	void				*dma_desc_pool_virt;
+	struct dma_device		common;
+	struct mv_xor_shared_private	*shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+	int			pending;
+	spinlock_t		lock; /* protects the descriptor slot pool */
+	void __iomem		*mmr_base;
+	unsigned int		idx;
+	enum dma_transaction_type	current_type;
+	struct list_head	chain;
+	struct list_head	completed_slots;
+	struct mv_xor_device	*device;
+	struct dma_chan		common;
+	struct mv_xor_desc_slot	*last_used;
+	struct list_head	all_slots;
+	int			slots_allocated;
+	struct tasklet_struct	irq_tasklet;
+#ifdef USE_TIMER
+	unsigned long		cleanup_time;
+	u32			current_on_last_cleanup;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @tx_list: list of slots that make up a multi-descriptor transaction
+ * @async_tx: support for the async_tx api
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+	struct list_head	slot_node;
+	struct list_head	chain_node;
+	struct list_head	completed_node;
+	enum dma_transaction_type	type;
+	void			*hw_desc;
+	struct mv_xor_desc_slot	*group_head;
+	u16			slot_cnt;
+	u16			slots_per_op;
+	u16			idx;
+	u16			unmap_src_cnt;
+	u32			value;
+	size_t			unmap_len;
+	struct list_head	tx_list;
+	struct dma_async_tx_descriptor	async_tx;
+	union {
+		u32		*xor_check_result;
+		u32		*crc32_result;
+	};
+#ifdef USE_TIMER
+	unsigned long		arrival_time;
+	struct timer_list	timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes	*/
+struct mv_xor_desc {
+	u32 status;		/* descriptor execution status */
+	u32 crc32_result;	/* result of CRC-32 calculation */
+	u32 desc_command;	/* type of operation to be carried out */
+	u32 phy_next_desc;	/* next descriptor address pointer */
+	u32 byte_count;		/* size of src/dst blocks in bytes */
+	u32 phy_dest_addr;	/* destination block address */
+	u32 phy_src_addr[8];	/* source block addresses */
+	u32 reserved0;
+	u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc)		\
+	container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx)	\
+	((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT	(128)
+#define XOR_MAX_BYTE_COUNT	((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT	XOR_MAX_BYTE_COUNT
+
+
+#endif
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
new file mode 100644
index 00000000..655d4ce6
--- /dev/null
+++ b/drivers/dma/mxs-dma.c
@@ -0,0 +1,715 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Refer to drivers/dma/imx-sdma.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/fsl/mxs-dma.h>
+
+#include <asm/irq.h>
+#include <mach/mxs.h>
+#include <mach/common.h>
+
+#include "dmaengine.h"
+
+/*
+ * NOTE: The term "PIO" throughout the mxs-dma implementation means
+ * PIO mode of mxs apbh-dma and apbx-dma.  With this working mode,
+ * dma can program the controller registers of peripheral devices.
+ */
+
+#define MXS_DMA_APBH		0
+#define MXS_DMA_APBX		1
+#define dma_is_apbh()		(mxs_dma->dev_id == MXS_DMA_APBH)
+
+#define APBH_VERSION_LATEST	3
+#define apbh_is_old()		(mxs_dma->version < APBH_VERSION_LATEST)
+
+#define HW_APBHX_CTRL0				0x000
+#define BM_APBH_CTRL0_APB_BURST8_EN		(1 << 29)
+#define BM_APBH_CTRL0_APB_BURST_EN		(1 << 28)
+#define BP_APBH_CTRL0_RESET_CHANNEL		16
+#define HW_APBHX_CTRL1				0x010
+#define HW_APBHX_CTRL2				0x020
+#define HW_APBHX_CHANNEL_CTRL			0x030
+#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL	16
+#define HW_APBH_VERSION				(cpu_is_mx23() ? 0x3f0 : 0x800)
+#define HW_APBX_VERSION				0x800
+#define BP_APBHX_VERSION_MAJOR			24
+#define HW_APBHX_CHn_NXTCMDAR(n) \
+	(((dma_is_apbh() && apbh_is_old()) ? 0x050 : 0x110) + (n) * 0x70)
+#define HW_APBHX_CHn_SEMA(n) \
+	(((dma_is_apbh() && apbh_is_old()) ? 0x080 : 0x140) + (n) * 0x70)
+
+/*
+ * ccw bits definitions
+ *
+ * COMMAND:		0..1	(2)
+ * CHAIN:		2	(1)
+ * IRQ:			3	(1)
+ * NAND_LOCK:		4	(1) - not implemented
+ * NAND_WAIT4READY:	5	(1) - not implemented
+ * DEC_SEM:		6	(1)
+ * WAIT4END:		7	(1)
+ * HALT_ON_TERMINATE:	8	(1)
+ * TERMINATE_FLUSH:	9	(1)
+ * RESERVED:		10..11	(2)
+ * PIO_NUM:		12..15	(4)
+ */
+#define BP_CCW_COMMAND		0
+#define BM_CCW_COMMAND		(3 << 0)
+#define CCW_CHAIN		(1 << 2)
+#define CCW_IRQ			(1 << 3)
+#define CCW_DEC_SEM		(1 << 6)
+#define CCW_WAIT4END		(1 << 7)
+#define CCW_HALT_ON_TERM	(1 << 8)
+#define CCW_TERM_FLUSH		(1 << 9)
+#define BP_CCW_PIO_NUM		12
+#define BM_CCW_PIO_NUM		(0xf << 12)
+
+#define BF_CCW(value, field)	(((value) << BP_CCW_##field) & BM_CCW_##field)
+
+#define MXS_DMA_CMD_NO_XFER	0
+#define MXS_DMA_CMD_WRITE	1
+#define MXS_DMA_CMD_READ	2
+#define MXS_DMA_CMD_DMA_SENSE	3	/* not implemented */
+
+struct mxs_dma_ccw {
+	u32		next;
+	u16		bits;
+	u16		xfer_bytes;
+#define MAX_XFER_BYTES	0xff00
+	u32		bufaddr;
+#define MXS_PIO_WORDS	16
+	u32		pio_words[MXS_PIO_WORDS];
+};
+
+#define NUM_CCW	(int)(PAGE_SIZE / sizeof(struct mxs_dma_ccw))
+
+struct mxs_dma_chan {
+	struct mxs_dma_engine		*mxs_dma;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	struct tasklet_struct		tasklet;
+	int				chan_irq;
+	struct mxs_dma_ccw		*ccw;
+	dma_addr_t			ccw_phys;
+	int				desc_count;
+	enum dma_status			status;
+	unsigned int			flags;
+#define MXS_DMA_SG_LOOP			(1 << 0)
+};
+
+#define MXS_DMA_CHANNELS		16
+#define MXS_DMA_CHANNELS_MASK		0xffff
+
+struct mxs_dma_engine {
+	int				dev_id;
+	unsigned int			version;
+	void __iomem			*base;
+	struct clk			*clk;
+	struct dma_device		dma_device;
+	struct device_dma_parameters	dma_parms;
+	struct mxs_dma_chan		mxs_chans[MXS_DMA_CHANNELS];
+};
+
+static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	else
+		writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR);
+}
+
+static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* set cmd_addr up */
+	writel(mxs_chan->ccw_phys,
+		mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(chan_id));
+
+	/* write 1 to SEMA to kick off the channel */
+	writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(chan_id));
+}
+
+static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
+{
+	mxs_chan->status = DMA_SUCCESS;
+}
+
+static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* freeze the channel */
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR);
+
+	mxs_chan->status = DMA_PAUSED;
+}
+
+static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* unfreeze the channel */
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_CLR_ADDR);
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+}
+
+static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mxs_dma_chan, chan);
+}
+
+static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	return dma_cookie_assign(tx);
+}
+
+static void mxs_dma_tasklet(unsigned long data)
+{
+	struct mxs_dma_chan *mxs_chan = (struct mxs_dma_chan *) data;
+
+	if (mxs_chan->desc.callback)
+		mxs_chan->desc.callback(mxs_chan->desc.callback_param);
+}
+
+static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
+{
+	struct mxs_dma_engine *mxs_dma = dev_id;
+	u32 stat1, stat2;
+
+	/* completion status */
+	stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
+	stat1 &= MXS_DMA_CHANNELS_MASK;
+	writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + MXS_CLR_ADDR);
+
+	/* error status */
+	stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
+	writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + MXS_CLR_ADDR);
+
+	/*
+	 * When both completion and error of termination bits set at the
+	 * same time, we do not take it as an error.  IOW, it only becomes
+	 * an error we need to handle here in case of either it's (1) a bus
+	 * error or (2) a termination error with no completion.
+	 */
+	stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
+		(~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
+
+	/* combine error and completion status for checking */
+	stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
+	while (stat1) {
+		int channel = fls(stat1) - 1;
+		struct mxs_dma_chan *mxs_chan =
+			&mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
+
+		if (channel >= MXS_DMA_CHANNELS) {
+			dev_dbg(mxs_dma->dma_device.dev,
+				"%s: error in channel %d\n", __func__,
+				channel - MXS_DMA_CHANNELS);
+			mxs_chan->status = DMA_ERROR;
+			mxs_dma_reset_chan(mxs_chan);
+		} else {
+			if (mxs_chan->flags & MXS_DMA_SG_LOOP)
+				mxs_chan->status = DMA_IN_PROGRESS;
+			else
+				mxs_chan->status = DMA_SUCCESS;
+		}
+
+		stat1 &= ~(1 << channel);
+
+		if (mxs_chan->status == DMA_SUCCESS)
+			dma_cookie_complete(&mxs_chan->desc);
+
+		/* schedule tasklet on this channel */
+		tasklet_schedule(&mxs_chan->tasklet);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_data *data = chan->private;
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int ret;
+
+	if (!data)
+		return -EINVAL;
+
+	mxs_chan->chan_irq = data->chan_irq;
+
+	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+				&mxs_chan->ccw_phys, GFP_KERNEL);
+	if (!mxs_chan->ccw) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	memset(mxs_chan->ccw, 0, PAGE_SIZE);
+
+	if (mxs_chan->chan_irq != NO_IRQ) {
+		ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+					0, "mxs-dma", mxs_dma);
+		if (ret)
+			goto err_irq;
+	}
+
+	ret = clk_prepare_enable(mxs_dma->clk);
+	if (ret)
+		goto err_clk;
+
+	mxs_dma_reset_chan(mxs_chan);
+
+	dma_async_tx_descriptor_init(&mxs_chan->desc, chan);
+	mxs_chan->desc.tx_submit = mxs_dma_tx_submit;
+
+	/* the descriptor is ready */
+	async_tx_ack(&mxs_chan->desc);
+
+	return 0;
+
+err_clk:
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+err_irq:
+	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+err_alloc:
+	return ret;
+}
+
+static void mxs_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+	mxs_dma_disable_chan(mxs_chan);
+
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+
+	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+
+	clk_disable_unprepare(mxs_dma->clk);
+}
+
+/*
+ * How to use the flags for ->device_prep_slave_sg() :
+ *    [1] If there is only one DMA command in the DMA chain, the code should be:
+ *            ......
+ *            ->device_prep_slave_sg(DMA_CTRL_ACK);
+ *            ......
+ *    [2] If there are two DMA commands in the DMA chain, the code should be
+ *            ......
+ *            ->device_prep_slave_sg(0);
+ *            ......
+ *            ->device_prep_slave_sg(DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ *            ......
+ *    [3] If there are more than two DMA commands in the DMA chain, the code
+ *        should be:
+ *            ......
+ *            ->device_prep_slave_sg(0);                                // First
+ *            ......
+ *            ->device_prep_slave_sg(DMA_PREP_INTERRUPT [| DMA_CTRL_ACK]);
+ *            ......
+ *            ->device_prep_slave_sg(DMA_PREP_INTERRUPT | DMA_CTRL_ACK); // Last
+ *            ......
+ */
+static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	struct mxs_dma_ccw *ccw;
+	struct scatterlist *sg;
+	int i, j;
+	u32 *pio;
+	bool append = flags & DMA_PREP_INTERRUPT;
+	int idx = append ? mxs_chan->desc_count : 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS && !append)
+		return NULL;
+
+	if (sg_len + (append ? idx : 0) > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				sg_len, NUM_CCW);
+		goto err_out;
+	}
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags = 0;
+
+	/*
+	 * If the sg is prepared with append flag set, the sg
+	 * will be appended to the last prepared sg.
+	 */
+	if (append) {
+		BUG_ON(idx < 1);
+		ccw = &mxs_chan->ccw[idx - 1];
+		ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits &= ~CCW_IRQ;
+		ccw->bits &= ~CCW_DEC_SEM;
+	} else {
+		idx = 0;
+	}
+
+	if (direction == DMA_TRANS_NONE) {
+		ccw = &mxs_chan->ccw[idx++];
+		pio = (u32 *) sgl;
+
+		for (j = 0; j < sg_len;)
+			ccw->pio_words[j++] = *pio++;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_DEC_SEM;
+		if (flags & DMA_CTRL_ACK)
+			ccw->bits |= CCW_WAIT4END;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(sg_len, PIO_NUM);
+		ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND);
+	} else {
+		for_each_sg(sgl, sg, sg_len, i) {
+			if (sg->length > MAX_XFER_BYTES) {
+				dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n",
+						sg->length, MAX_XFER_BYTES);
+				goto err_out;
+			}
+
+			ccw = &mxs_chan->ccw[idx++];
+
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+			ccw->bufaddr = sg->dma_address;
+			ccw->xfer_bytes = sg->length;
+
+			ccw->bits = 0;
+			ccw->bits |= CCW_CHAIN;
+			ccw->bits |= CCW_HALT_ON_TERM;
+			ccw->bits |= CCW_TERM_FLUSH;
+			ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
+					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
+					COMMAND);
+
+			if (i + 1 == sg_len) {
+				ccw->bits &= ~CCW_CHAIN;
+				ccw->bits |= CCW_IRQ;
+				ccw->bits |= CCW_DEC_SEM;
+				if (flags & DMA_CTRL_ACK)
+					ccw->bits |= CCW_WAIT4END;
+			}
+		}
+	}
+	mxs_chan->desc_count = idx;
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int num_periods = buf_len / period_len;
+	int i = 0, buf = 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags |= MXS_DMA_SG_LOOP;
+
+	if (num_periods > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				num_periods, NUM_CCW);
+		goto err_out;
+	}
+
+	if (period_len > MAX_XFER_BYTES) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum period size exceeded: %d > %d\n",
+				period_len, MAX_XFER_BYTES);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i];
+
+		if (i + 1 == num_periods)
+			ccw->next = mxs_chan->ccw_phys;
+		else
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1);
+
+		ccw->bufaddr = dma_addr;
+		ccw->xfer_bytes = period_len;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
+				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+	mxs_chan->desc_count = i;
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	int ret = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		mxs_dma_reset_chan(mxs_chan);
+		mxs_dma_disable_chan(mxs_chan);
+		break;
+	case DMA_PAUSE:
+		mxs_dma_pause_chan(mxs_chan);
+		break;
+	case DMA_RESUME:
+		mxs_dma_resume_chan(mxs_chan);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	dma_cookie_t last_used;
+
+	last_used = chan->cookie;
+	dma_set_tx_state(txstate, chan->completed_cookie, last_used, 0);
+
+	return mxs_chan->status;
+}
+
+static void mxs_dma_issue_pending(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+
+	mxs_dma_enable_chan(mxs_chan);
+}
+
+static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+{
+	int ret;
+
+	ret = clk_prepare_enable(mxs_dma->clk);
+	if (ret)
+		return ret;
+
+	ret = mxs_reset_block(mxs_dma->base);
+	if (ret)
+		goto err_out;
+
+	/* only major version matters */
+	mxs_dma->version = readl(mxs_dma->base +
+				((mxs_dma->dev_id == MXS_DMA_APBX) ?
+				HW_APBX_VERSION : HW_APBH_VERSION)) >>
+				BP_APBHX_VERSION_MAJOR;
+
+	/* enable apbh burst */
+	if (dma_is_apbh()) {
+		writel(BM_APBH_CTRL0_APB_BURST_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+		writel(BM_APBH_CTRL0_APB_BURST8_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	}
+
+	/* enable irq for all the channels */
+	writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS,
+		mxs_dma->base + HW_APBHX_CTRL1 + MXS_SET_ADDR);
+
+err_out:
+	clk_disable_unprepare(mxs_dma->clk);
+	return ret;
+}
+
+static int __init mxs_dma_probe(struct platform_device *pdev)
+{
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(pdev);
+	struct mxs_dma_engine *mxs_dma;
+	struct resource *iores;
+	int ret, i;
+
+	mxs_dma = kzalloc(sizeof(*mxs_dma), GFP_KERNEL);
+	if (!mxs_dma)
+		return -ENOMEM;
+
+	mxs_dma->dev_id = id_entry->driver_data;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!request_mem_region(iores->start, resource_size(iores),
+				pdev->name)) {
+		ret = -EBUSY;
+		goto err_request_region;
+	}
+
+	mxs_dma->base = ioremap(iores->start, resource_size(iores));
+	if (!mxs_dma->base) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	mxs_dma->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mxs_dma->clk)) {
+		ret = PTR_ERR(mxs_dma->clk);
+		goto err_clk;
+	}
+
+	dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask);
+
+	INIT_LIST_HEAD(&mxs_dma->dma_device.channels);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < MXS_DMA_CHANNELS; i++) {
+		struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i];
+
+		mxs_chan->mxs_dma = mxs_dma;
+		mxs_chan->chan.device = &mxs_dma->dma_device;
+		dma_cookie_init(&mxs_chan->chan);
+
+		tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet,
+			     (unsigned long) mxs_chan);
+
+
+		/* Add the channel to mxs_chan list */
+		list_add_tail(&mxs_chan->chan.device_node,
+			&mxs_dma->dma_device.channels);
+	}
+
+	ret = mxs_dma_init(mxs_dma);
+	if (ret)
+		goto err_init;
+
+	mxs_dma->dma_device.dev = &pdev->dev;
+
+	/* mxs_dma gets 65535 bytes maximum sg size */
+	mxs_dma->dma_device.dev->dma_parms = &mxs_dma->dma_parms;
+	dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES);
+
+	mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources;
+	mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources;
+	mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status;
+	mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg;
+	mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic;
+	mxs_dma->dma_device.device_control = mxs_dma_control;
+	mxs_dma->dma_device.device_issue_pending = mxs_dma_issue_pending;
+
+	ret = dma_async_device_register(&mxs_dma->dma_device);
+	if (ret) {
+		dev_err(mxs_dma->dma_device.dev, "unable to register\n");
+		goto err_init;
+	}
+
+	dev_info(mxs_dma->dma_device.dev, "initialized\n");
+
+	return 0;
+
+err_init:
+	clk_put(mxs_dma->clk);
+err_clk:
+	iounmap(mxs_dma->base);
+err_ioremap:
+	release_mem_region(iores->start, resource_size(iores));
+err_request_region:
+	kfree(mxs_dma);
+	return ret;
+}
+
+static struct platform_device_id mxs_dma_type[] = {
+	{
+		.name = "mxs-dma-apbh",
+		.driver_data = MXS_DMA_APBH,
+	}, {
+		.name = "mxs-dma-apbx",
+		.driver_data = MXS_DMA_APBX,
+	}, {
+		/* end of list */
+	}
+};
+
+static struct platform_driver mxs_dma_driver = {
+	.driver		= {
+		.name	= "mxs-dma",
+	},
+	.id_table	= mxs_dma_type,
+};
+
+static int __init mxs_dma_module_init(void)
+{
+	return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
+}
+subsys_initcall(mxs_dma_module_init);
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
new file mode 100644
index 00000000..65c0495a
--- /dev/null
+++ b/drivers/dma/pch_dma.c
@@ -0,0 +1,1048 @@
+/*
+ * Topcliff PCH DMA controller driver
+ * Copyright (c) 2010 Intel Corporation
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pch_dma.h>
+
+#include "dmaengine.h"
+
+#define DRV_NAME "pch-dma"
+
+#define DMA_CTL0_DISABLE		0x0
+#define DMA_CTL0_SG			0x1
+#define DMA_CTL0_ONESHOT		0x2
+#define DMA_CTL0_MODE_MASK_BITS		0x3
+#define DMA_CTL0_DIR_SHIFT_BITS		2
+#define DMA_CTL0_BITS_PER_CH		4
+
+#define DMA_CTL2_START_SHIFT_BITS	8
+#define DMA_CTL2_IRQ_ENABLE_MASK	((1UL << DMA_CTL2_START_SHIFT_BITS) - 1)
+
+#define DMA_STATUS_IDLE			0x0
+#define DMA_STATUS_DESC_READ		0x1
+#define DMA_STATUS_WAIT			0x2
+#define DMA_STATUS_ACCESS		0x3
+#define DMA_STATUS_BITS_PER_CH		2
+#define DMA_STATUS_MASK_BITS		0x3
+#define DMA_STATUS_SHIFT_BITS		16
+#define DMA_STATUS_IRQ(x)		(0x1 << (x))
+#define DMA_STATUS0_ERR(x)		(0x1 << ((x) + 8))
+#define DMA_STATUS2_ERR(x)		(0x1 << (x))
+
+#define DMA_DESC_WIDTH_SHIFT_BITS	12
+#define DMA_DESC_WIDTH_1_BYTE		(0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_2_BYTES		(0x2 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_4_BYTES		(0x0 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_MAX_COUNT_1_BYTE	0x3FF
+#define DMA_DESC_MAX_COUNT_2_BYTES	0x3FF
+#define DMA_DESC_MAX_COUNT_4_BYTES	0x7FF
+#define DMA_DESC_END_WITHOUT_IRQ	0x0
+#define DMA_DESC_END_WITH_IRQ		0x1
+#define DMA_DESC_FOLLOW_WITHOUT_IRQ	0x2
+#define DMA_DESC_FOLLOW_WITH_IRQ	0x3
+
+#define MAX_CHAN_NR			12
+
+#define DMA_MASK_CTL0_MODE	0x33333333
+#define DMA_MASK_CTL2_MODE	0x00003333
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+struct pch_dma_desc_regs {
+	u32	dev_addr;
+	u32	mem_addr;
+	u32	size;
+	u32	next;
+};
+
+struct pch_dma_regs {
+	u32	dma_ctl0;
+	u32	dma_ctl1;
+	u32	dma_ctl2;
+	u32	dma_ctl3;
+	u32	dma_sts0;
+	u32	dma_sts1;
+	u32	dma_sts2;
+	u32	reserved3;
+	struct pch_dma_desc_regs desc[MAX_CHAN_NR];
+};
+
+struct pch_dma_desc {
+	struct pch_dma_desc_regs regs;
+	struct dma_async_tx_descriptor txd;
+	struct list_head	desc_node;
+	struct list_head	tx_list;
+};
+
+struct pch_dma_chan {
+	struct dma_chan		chan;
+	void __iomem *membase;
+	enum dma_transfer_direction dir;
+	struct tasklet_struct	tasklet;
+	unsigned long		err_status;
+
+	spinlock_t		lock;
+
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		descs_allocated;
+};
+
+#define PDC_DEV_ADDR	0x00
+#define PDC_MEM_ADDR	0x04
+#define PDC_SIZE	0x08
+#define PDC_NEXT	0x0C
+
+#define channel_readl(pdc, name) \
+	readl((pdc)->membase + PDC_##name)
+#define channel_writel(pdc, name, val) \
+	writel((val), (pdc)->membase + PDC_##name)
+
+struct pch_dma {
+	struct dma_device	dma;
+	void __iomem *membase;
+	struct pci_pool		*pool;
+	struct pch_dma_regs	regs;
+	struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR];
+	struct pch_dma_chan	channels[MAX_CHAN_NR];
+};
+
+#define PCH_DMA_CTL0	0x00
+#define PCH_DMA_CTL1	0x04
+#define PCH_DMA_CTL2	0x08
+#define PCH_DMA_CTL3	0x0C
+#define PCH_DMA_STS0	0x10
+#define PCH_DMA_STS1	0x14
+#define PCH_DMA_STS2	0x18
+
+#define dma_readl(pd, name) \
+	readl((pd)->membase + PCH_DMA_##name)
+#define dma_writel(pd, name, val) \
+	writel((val), (pd)->membase + PCH_DMA_##name)
+
+static inline
+struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct pch_dma_desc, txd);
+}
+
+static inline struct pch_dma_chan *to_pd_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pch_dma_chan, chan);
+}
+
+static inline struct pch_dma *to_pd(struct dma_device *ddev)
+{
+	return container_of(ddev, struct pch_dma, dma);
+}
+
+static inline struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+static inline
+struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan)
+{
+	return list_first_entry(&pd_chan->active_list,
+				struct pch_dma_desc, desc_node);
+}
+
+static inline
+struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan)
+{
+	return list_first_entry(&pd_chan->queue,
+				struct pch_dma_desc, desc_node);
+}
+
+static void pdc_enable_irq(struct dma_chan *chan, int enable)
+{
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+	int pos;
+
+	if (chan->chan_id < 8)
+		pos = chan->chan_id;
+	else
+		pos = chan->chan_id + 8;
+
+	val = dma_readl(pd, CTL2);
+
+	if (enable)
+		val |= 0x1 << pos;
+	else
+		val &= ~(0x1 << pos);
+
+	dma_writel(pd, CTL2, val);
+
+	dev_dbg(chan2dev(chan), "pdc_enable_irq: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static void pdc_set_dir(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+	u32 mask_mode;
+	u32 mask_ctl;
+
+	if (chan->chan_id < 8) {
+		val = dma_readl(pd, CTL0);
+
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+					(DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val &= mask_mode;
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
+			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+				       DMA_CTL0_DIR_SHIFT_BITS);
+		else
+			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+					 DMA_CTL0_DIR_SHIFT_BITS));
+
+		val |= mask_ctl;
+		dma_writel(pd, CTL0, val);
+	} else {
+		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+		val = dma_readl(pd, CTL3);
+
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+						(DMA_CTL0_BITS_PER_CH * ch);
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		val &= mask_mode;
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
+			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+				       DMA_CTL0_DIR_SHIFT_BITS);
+		else
+			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+					 DMA_CTL0_DIR_SHIFT_BITS));
+		val |= mask_ctl;
+		dma_writel(pd, CTL3, val);
+	}
+
+	dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static void pdc_set_mode(struct dma_chan *chan, u32 mode)
+{
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+	u32 mask_ctl;
+	u32 mask_dir;
+
+	if (chan->chan_id < 8) {
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+			   (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
+		val = dma_readl(pd, CTL0);
+		val &= mask_dir;
+		val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		val |= mask_ctl;
+		dma_writel(pd, CTL0, val);
+	} else {
+		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
+		val = dma_readl(pd, CTL3);
+		val &= mask_dir;
+		val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
+		val |= mask_ctl;
+		dma_writel(pd, CTL3, val);
+	}
+
+	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static u32 pdc_get_status0(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS0);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
+}
+
+static u32 pdc_get_status2(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS2);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8)));
+}
+
+static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
+{
+	u32 sts;
+
+	if (pd_chan->chan.chan_id < 8)
+		sts = pdc_get_status0(pd_chan);
+	else
+		sts = pdc_get_status2(pd_chan);
+
+
+	if (sts == DMA_STATUS_IDLE)
+		return true;
+	else
+		return false;
+}
+
+static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc)
+{
+	if (!pdc_is_idle(pd_chan)) {
+		dev_err(chan2dev(&pd_chan->chan),
+			"BUG: Attempt to start non-idle channel\n");
+		return;
+	}
+
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> dev_addr: %x\n",
+		pd_chan->chan.chan_id, desc->regs.dev_addr);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> mem_addr: %x\n",
+		pd_chan->chan.chan_id, desc->regs.mem_addr);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> size: %x\n",
+		pd_chan->chan.chan_id, desc->regs.size);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> next: %x\n",
+		pd_chan->chan.chan_id, desc->regs.next);
+
+	if (list_empty(&desc->tx_list)) {
+		channel_writel(pd_chan, DEV_ADDR, desc->regs.dev_addr);
+		channel_writel(pd_chan, MEM_ADDR, desc->regs.mem_addr);
+		channel_writel(pd_chan, SIZE, desc->regs.size);
+		channel_writel(pd_chan, NEXT, desc->regs.next);
+		pdc_set_mode(&pd_chan->chan, DMA_CTL0_ONESHOT);
+	} else {
+		channel_writel(pd_chan, NEXT, desc->txd.phys);
+		pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG);
+	}
+}
+
+static void pdc_chain_complete(struct pch_dma_chan *pd_chan,
+			       struct pch_dma_desc *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->txd;
+	dma_async_tx_callback callback = txd->callback;
+	void *param = txd->callback_param;
+
+	list_splice_init(&desc->tx_list, &pd_chan->free_list);
+	list_move(&desc->desc_node, &pd_chan->free_list);
+
+	if (callback)
+		callback(param);
+}
+
+static void pdc_complete_all(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	BUG_ON(!pdc_is_idle(pd_chan));
+
+	if (!list_empty(&pd_chan->queue))
+		pdc_dostart(pd_chan, pdc_first_queued(pd_chan));
+
+	list_splice_init(&pd_chan->active_list, &list);
+	list_splice_init(&pd_chan->queue, &pd_chan->active_list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		pdc_chain_complete(pd_chan, desc);
+}
+
+static void pdc_handle_error(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *bad_desc;
+
+	bad_desc = pdc_first_active(pd_chan);
+	list_del(&bad_desc->desc_node);
+
+	list_splice_init(&pd_chan->queue, pd_chan->active_list.prev);
+
+	if (!list_empty(&pd_chan->active_list))
+		pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+
+	dev_crit(chan2dev(&pd_chan->chan), "Bad descriptor submitted\n");
+	dev_crit(chan2dev(&pd_chan->chan), "descriptor cookie: %d\n",
+		 bad_desc->txd.cookie);
+
+	pdc_chain_complete(pd_chan, bad_desc);
+}
+
+static void pdc_advance_work(struct pch_dma_chan *pd_chan)
+{
+	if (list_empty(&pd_chan->active_list) ||
+		list_is_singular(&pd_chan->active_list)) {
+		pdc_complete_all(pd_chan);
+	} else {
+		pdc_chain_complete(pd_chan, pdc_first_active(pd_chan));
+		pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+	}
+}
+
+static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct pch_dma_desc *desc = to_pd_desc(txd);
+	struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan);
+	dma_cookie_t cookie;
+
+	spin_lock(&pd_chan->lock);
+	cookie = dma_cookie_assign(txd);
+
+	if (list_empty(&pd_chan->active_list)) {
+		list_add_tail(&desc->desc_node, &pd_chan->active_list);
+		pdc_dostart(pd_chan, desc);
+	} else {
+		list_add_tail(&desc->desc_node, &pd_chan->queue);
+	}
+
+	spin_unlock(&pd_chan->lock);
+	return 0;
+}
+
+static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags)
+{
+	struct pch_dma_desc *desc = NULL;
+	struct pch_dma *pd = to_pd(chan->device);
+	dma_addr_t addr;
+
+	desc = pci_pool_alloc(pd->pool, flags, &addr);
+	if (desc) {
+		memset(desc, 0, sizeof(struct pch_dma_desc));
+		INIT_LIST_HEAD(&desc->tx_list);
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = pd_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.phys = addr;
+	}
+
+	return desc;
+}
+
+static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *desc, *_d;
+	struct pch_dma_desc *ret = NULL;
+	int i = 0;
+
+	spin_lock(&pd_chan->lock);
+	list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
+		i++;
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc);
+	}
+	spin_unlock(&pd_chan->lock);
+	dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
+
+	if (!ret) {
+		ret = pdc_alloc_desc(&pd_chan->chan, GFP_NOIO);
+		if (ret) {
+			spin_lock(&pd_chan->lock);
+			pd_chan->descs_allocated++;
+			spin_unlock(&pd_chan->lock);
+		} else {
+			dev_err(chan2dev(&pd_chan->chan),
+				"failed to alloc desc\n");
+		}
+	}
+
+	return ret;
+}
+
+static void pdc_desc_put(struct pch_dma_chan *pd_chan,
+			 struct pch_dma_desc *desc)
+{
+	if (desc) {
+		spin_lock(&pd_chan->lock);
+		list_splice_init(&desc->tx_list, &pd_chan->free_list);
+		list_add(&desc->desc_node, &pd_chan->free_list);
+		spin_unlock(&pd_chan->lock);
+	}
+}
+
+static int pd_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+		return -EIO;
+	}
+
+	if (!list_empty(&pd_chan->free_list))
+		return pd_chan->descs_allocated;
+
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = pdc_alloc_desc(chan, GFP_KERNEL);
+
+		if (!desc) {
+			dev_warn(chan2dev(chan),
+				"Only allocated %d initial descriptors\n", i);
+			break;
+		}
+
+		list_add_tail(&desc->desc_node, &tmp_list);
+	}
+
+	spin_lock_irq(&pd_chan->lock);
+	list_splice(&tmp_list, &pd_chan->free_list);
+	pd_chan->descs_allocated = i;
+	dma_cookie_init(chan);
+	spin_unlock_irq(&pd_chan->lock);
+
+	pdc_enable_irq(chan, 1);
+
+	return pd_chan->descs_allocated;
+}
+
+static void pd_free_chan_resources(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma *pd = to_pd(chan->device);
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(tmp_list);
+
+	BUG_ON(!pdc_is_idle(pd_chan));
+	BUG_ON(!list_empty(&pd_chan->active_list));
+	BUG_ON(!list_empty(&pd_chan->queue));
+
+	spin_lock_irq(&pd_chan->lock);
+	list_splice_init(&pd_chan->free_list, &tmp_list);
+	pd_chan->descs_allocated = 0;
+	spin_unlock_irq(&pd_chan->lock);
+
+	list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
+		pci_pool_free(pd->pool, desc, desc->txd.phys);
+
+	pdc_enable_irq(chan, 0);
+}
+
+static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+				    struct dma_tx_state *txstate)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	enum dma_status ret;
+
+	spin_lock_irq(&pd_chan->lock);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	spin_unlock_irq(&pd_chan->lock);
+
+	return ret;
+}
+
+static void pd_issue_pending(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+
+	if (pdc_is_idle(pd_chan)) {
+		spin_lock(&pd_chan->lock);
+		pdc_advance_work(pd_chan);
+		spin_unlock(&pd_chan->lock);
+	}
+}
+
+static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
+			struct scatterlist *sgl, unsigned int sg_len,
+			enum dma_transfer_direction direction, unsigned long flags,
+			void *context)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_slave *pd_slave = chan->private;
+	struct pch_dma_desc *first = NULL;
+	struct pch_dma_desc *prev = NULL;
+	struct pch_dma_desc *desc = NULL;
+	struct scatterlist *sg;
+	dma_addr_t reg;
+	int i;
+
+	if (unlikely(!sg_len)) {
+		dev_info(chan2dev(chan), "prep_slave_sg: length is zero!\n");
+		return NULL;
+	}
+
+	if (direction == DMA_DEV_TO_MEM)
+		reg = pd_slave->rx_reg;
+	else if (direction == DMA_MEM_TO_DEV)
+		reg = pd_slave->tx_reg;
+	else
+		return NULL;
+
+	pd_chan->dir = direction;
+	pdc_set_dir(chan);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc = pdc_desc_get(pd_chan);
+
+		if (!desc)
+			goto err_desc_get;
+
+		desc->regs.dev_addr = reg;
+		desc->regs.mem_addr = sg_phys(sg);
+		desc->regs.size = sg_dma_len(sg);
+		desc->regs.next = DMA_DESC_FOLLOW_WITHOUT_IRQ;
+
+		switch (pd_slave->width) {
+		case PCH_DMA_WIDTH_1_BYTE:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_1_BYTE)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_1_BYTE;
+			break;
+		case PCH_DMA_WIDTH_2_BYTES:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_2_BYTES)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_2_BYTES;
+			break;
+		case PCH_DMA_WIDTH_4_BYTES:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_4_BYTES)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_4_BYTES;
+			break;
+		default:
+			goto err_desc_get;
+		}
+
+		if (!first) {
+			first = desc;
+		} else {
+			prev->regs.next |= desc->txd.phys;
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+
+		prev = desc;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		desc->regs.next = DMA_DESC_END_WITH_IRQ;
+	else
+		desc->regs.next = DMA_DESC_END_WITHOUT_IRQ;
+
+	first->txd.cookie = -EBUSY;
+	desc->txd.flags = flags;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "failed to get desc or wrong parameters\n");
+	pdc_desc_put(pd_chan, first);
+	return NULL;
+}
+
+static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			     unsigned long arg)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	spin_lock_irq(&pd_chan->lock);
+
+	pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
+
+	list_splice_init(&pd_chan->active_list, &list);
+	list_splice_init(&pd_chan->queue, &list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		pdc_chain_complete(pd_chan, desc);
+
+	spin_unlock_irq(&pd_chan->lock);
+
+	return 0;
+}
+
+static void pdc_tasklet(unsigned long data)
+{
+	struct pch_dma_chan *pd_chan = (struct pch_dma_chan *)data;
+	unsigned long flags;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_err(chan2dev(&pd_chan->chan),
+			"BUG: handle non-idle channel in tasklet\n");
+		return;
+	}
+
+	spin_lock_irqsave(&pd_chan->lock, flags);
+	if (test_and_clear_bit(0, &pd_chan->err_status))
+		pdc_handle_error(pd_chan);
+	else
+		pdc_advance_work(pd_chan);
+	spin_unlock_irqrestore(&pd_chan->lock, flags);
+}
+
+static irqreturn_t pd_irq(int irq, void *devid)
+{
+	struct pch_dma *pd = (struct pch_dma *)devid;
+	struct pch_dma_chan *pd_chan;
+	u32 sts0;
+	u32 sts2;
+	int i;
+	int ret0 = IRQ_NONE;
+	int ret2 = IRQ_NONE;
+
+	sts0 = dma_readl(pd, STS0);
+	sts2 = dma_readl(pd, STS2);
+
+	dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
+
+	for (i = 0; i < pd->dma.chancnt; i++) {
+		pd_chan = &pd->channels[i];
+
+		if (i < 8) {
+			if (sts0 & DMA_STATUS_IRQ(i)) {
+				if (sts0 & DMA_STATUS0_ERR(i))
+					set_bit(0, &pd_chan->err_status);
+
+				tasklet_schedule(&pd_chan->tasklet);
+				ret0 = IRQ_HANDLED;
+			}
+		} else {
+			if (sts2 & DMA_STATUS_IRQ(i - 8)) {
+				if (sts2 & DMA_STATUS2_ERR(i))
+					set_bit(0, &pd_chan->err_status);
+
+				tasklet_schedule(&pd_chan->tasklet);
+				ret2 = IRQ_HANDLED;
+			}
+		}
+	}
+
+	/* clear interrupt bits in status register */
+	if (ret0)
+		dma_writel(pd, STS0, sts0);
+	if (ret2)
+		dma_writel(pd, STS2, sts2);
+
+	return ret0 | ret2;
+}
+
+#ifdef	CONFIG_PM
+static void pch_dma_save_regs(struct pch_dma *pd)
+{
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+	int i = 0;
+
+	pd->regs.dma_ctl0 = dma_readl(pd, CTL0);
+	pd->regs.dma_ctl1 = dma_readl(pd, CTL1);
+	pd->regs.dma_ctl2 = dma_readl(pd, CTL2);
+	pd->regs.dma_ctl3 = dma_readl(pd, CTL3);
+
+	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+		pd_chan = to_pd_chan(chan);
+
+		pd->ch_regs[i].dev_addr = channel_readl(pd_chan, DEV_ADDR);
+		pd->ch_regs[i].mem_addr = channel_readl(pd_chan, MEM_ADDR);
+		pd->ch_regs[i].size = channel_readl(pd_chan, SIZE);
+		pd->ch_regs[i].next = channel_readl(pd_chan, NEXT);
+
+		i++;
+	}
+}
+
+static void pch_dma_restore_regs(struct pch_dma *pd)
+{
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+	int i = 0;
+
+	dma_writel(pd, CTL0, pd->regs.dma_ctl0);
+	dma_writel(pd, CTL1, pd->regs.dma_ctl1);
+	dma_writel(pd, CTL2, pd->regs.dma_ctl2);
+	dma_writel(pd, CTL3, pd->regs.dma_ctl3);
+
+	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+		pd_chan = to_pd_chan(chan);
+
+		channel_writel(pd_chan, DEV_ADDR, pd->ch_regs[i].dev_addr);
+		channel_writel(pd_chan, MEM_ADDR, pd->ch_regs[i].mem_addr);
+		channel_writel(pd_chan, SIZE, pd->ch_regs[i].size);
+		channel_writel(pd_chan, NEXT, pd->ch_regs[i].next);
+
+		i++;
+	}
+}
+
+static int pch_dma_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+
+	if (pd)
+		pch_dma_save_regs(pd);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int pch_dma_resume(struct pci_dev *pdev)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_dbg(&pdev->dev, "failed to enable device\n");
+		return err;
+	}
+
+	if (pd)
+		pch_dma_restore_regs(pd);
+
+	return 0;
+}
+#endif
+
+static int __devinit pch_dma_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *id)
+{
+	struct pch_dma *pd;
+	struct pch_dma_regs *regs;
+	unsigned int nr_channels;
+	int err;
+	int i;
+
+	nr_channels = id->driver_data;
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, pd);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device\n");
+		goto err_free_mem;
+	}
+
+	if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Cannot find proper base address\n");
+		goto err_disable_pdev;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev, "Cannot set proper DMA config\n");
+		goto err_free_res;
+	}
+
+	regs = pd->membase = pci_iomap(pdev, 1, 0);
+	if (!pd->membase) {
+		dev_err(&pdev->dev, "Cannot map MMIO registers\n");
+		err = -ENOMEM;
+		goto err_free_res;
+	}
+
+	pci_set_master(pdev);
+
+	err = request_irq(pdev->irq, pd_irq, IRQF_SHARED, DRV_NAME, pd);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request IRQ\n");
+		goto err_iounmap;
+	}
+
+	pd->pool = pci_pool_create("pch_dma_desc_pool", pdev,
+				   sizeof(struct pch_dma_desc), 4, 0);
+	if (!pd->pool) {
+		dev_err(&pdev->dev, "Failed to alloc DMA descriptors\n");
+		err = -ENOMEM;
+		goto err_free_irq;
+	}
+
+	pd->dma.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&pd->dma.channels);
+
+	for (i = 0; i < nr_channels; i++) {
+		struct pch_dma_chan *pd_chan = &pd->channels[i];
+
+		pd_chan->chan.device = &pd->dma;
+		dma_cookie_init(&pd_chan->chan);
+
+		pd_chan->membase = &regs->desc[i];
+
+		spin_lock_init(&pd_chan->lock);
+
+		INIT_LIST_HEAD(&pd_chan->active_list);
+		INIT_LIST_HEAD(&pd_chan->queue);
+		INIT_LIST_HEAD(&pd_chan->free_list);
+
+		tasklet_init(&pd_chan->tasklet, pdc_tasklet,
+			     (unsigned long)pd_chan);
+		list_add_tail(&pd_chan->chan.device_node, &pd->dma.channels);
+	}
+
+	dma_cap_zero(pd->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pd->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, pd->dma.cap_mask);
+
+	pd->dma.device_alloc_chan_resources = pd_alloc_chan_resources;
+	pd->dma.device_free_chan_resources = pd_free_chan_resources;
+	pd->dma.device_tx_status = pd_tx_status;
+	pd->dma.device_issue_pending = pd_issue_pending;
+	pd->dma.device_prep_slave_sg = pd_prep_slave_sg;
+	pd->dma.device_control = pd_device_control;
+
+	err = dma_async_device_register(&pd->dma);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register DMA device\n");
+		goto err_free_pool;
+	}
+
+	return 0;
+
+err_free_pool:
+	pci_pool_destroy(pd->pool);
+err_free_irq:
+	free_irq(pdev->irq, pd);
+err_iounmap:
+	pci_iounmap(pdev, pd->membase);
+err_free_res:
+	pci_release_regions(pdev);
+err_disable_pdev:
+	pci_disable_device(pdev);
+err_free_mem:
+	return err;
+}
+
+static void __devexit pch_dma_remove(struct pci_dev *pdev)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+
+	if (pd) {
+		dma_async_device_unregister(&pd->dma);
+
+		list_for_each_entry_safe(chan, _c, &pd->dma.channels,
+					 device_node) {
+			pd_chan = to_pd_chan(chan);
+
+			tasklet_disable(&pd_chan->tasklet);
+			tasklet_kill(&pd_chan->tasklet);
+		}
+
+		pci_pool_destroy(pd->pool);
+		free_irq(pdev->irq, pd);
+		pci_iounmap(pdev, pd->membase);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+		kfree(pd);
+	}
+}
+
+/* PCI Device ID of DMA device */
+#define PCI_VENDOR_ID_ROHM             0x10DB
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH        0x8810
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH        0x8815
+#define PCI_DEVICE_ID_ML7213_DMA1_8CH	0x8026
+#define PCI_DEVICE_ID_ML7213_DMA2_8CH	0x802B
+#define PCI_DEVICE_ID_ML7213_DMA3_4CH	0x8034
+#define PCI_DEVICE_ID_ML7213_DMA4_12CH	0x8032
+#define PCI_DEVICE_ID_ML7223_DMA1_4CH	0x800B
+#define PCI_DEVICE_ID_ML7223_DMA2_4CH	0x800E
+#define PCI_DEVICE_ID_ML7223_DMA3_4CH	0x8017
+#define PCI_DEVICE_ID_ML7223_DMA4_4CH	0x803B
+#define PCI_DEVICE_ID_ML7831_DMA1_8CH	0x8810
+#define PCI_DEVICE_ID_ML7831_DMA2_4CH	0x8815
+
+DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA1_8CH), 8}, /* UART */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA2_4CH), 4}, /* SPI */
+	{ 0, },
+};
+
+static struct pci_driver pch_dma_driver = {
+	.name		= DRV_NAME,
+	.id_table	= pch_dma_id_table,
+	.probe		= pch_dma_probe,
+	.remove		= __devexit_p(pch_dma_remove),
+#ifdef CONFIG_PM
+	.suspend	= pch_dma_suspend,
+	.resume		= pch_dma_resume,
+#endif
+};
+
+static int __init pch_dma_init(void)
+{
+	return pci_register_driver(&pch_dma_driver);
+}
+
+static void __exit pch_dma_exit(void)
+{
+	pci_unregister_driver(&pch_dma_driver);
+}
+
+module_init(pch_dma_init);
+module_exit(pch_dma_exit);
+
+MODULE_DESCRIPTION("Intel EG20T PCH / LAPIS Semicon ML7213/ML7223/ML7831 IOH "
+		   "DMA controller driver");
+MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
new file mode 100644
index 00000000..8c44f17a
--- /dev/null
+++ b/drivers/dma/pl330.c
@@ -0,0 +1,3119 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Copyright (C) 2010 Samsung Electronics Co. Ltd.
+ *	Jaswinder Singh <jassi.brar@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/pl330.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+
+#include "dmaengine.h"
+#define PL330_MAX_CHAN		8
+#define PL330_MAX_IRQS		32
+#define PL330_MAX_PERI		32
+
+enum pl330_srccachectrl {
+	SCCTRL0,	/* Noncacheable and nonbufferable */
+	SCCTRL1,	/* Bufferable only */
+	SCCTRL2,	/* Cacheable, but do not allocate */
+	SCCTRL3,	/* Cacheable and bufferable, but do not allocate */
+	SINVALID1,
+	SINVALID2,
+	SCCTRL6,	/* Cacheable write-through, allocate on reads only */
+	SCCTRL7,	/* Cacheable write-back, allocate on reads only */
+};
+
+enum pl330_dstcachectrl {
+	DCCTRL0,	/* Noncacheable and nonbufferable */
+	DCCTRL1,	/* Bufferable only */
+	DCCTRL2,	/* Cacheable, but do not allocate */
+	DCCTRL3,	/* Cacheable and bufferable, but do not allocate */
+	DINVALID1,	/* AWCACHE = 0x1000 */
+	DINVALID2,
+	DCCTRL6,	/* Cacheable write-through, allocate on writes only */
+	DCCTRL7,	/* Cacheable write-back, allocate on writes only */
+};
+
+enum pl330_byteswap {
+	SWAP_NO,
+	SWAP_2,
+	SWAP_4,
+	SWAP_8,
+	SWAP_16,
+};
+
+enum pl330_reqtype {
+	MEMTOMEM,
+	MEMTODEV,
+	DEVTOMEM,
+	DEVTODEV,
+};
+
+/* Register and Bit field Definitions */
+#define DS			0x0
+#define DS_ST_STOP		0x0
+#define DS_ST_EXEC		0x1
+#define DS_ST_CMISS		0x2
+#define DS_ST_UPDTPC		0x3
+#define DS_ST_WFE		0x4
+#define DS_ST_ATBRR		0x5
+#define DS_ST_QBUSY		0x6
+#define DS_ST_WFP		0x7
+#define DS_ST_KILL		0x8
+#define DS_ST_CMPLT		0x9
+#define DS_ST_FLTCMP		0xe
+#define DS_ST_FAULT		0xf
+
+#define DPC			0x4
+#define INTEN			0x20
+#define ES			0x24
+#define INTSTATUS		0x28
+#define INTCLR			0x2c
+#define FSM			0x30
+#define FSC			0x34
+#define FTM			0x38
+
+#define _FTC			0x40
+#define FTC(n)			(_FTC + (n)*0x4)
+
+#define _CS			0x100
+#define CS(n)			(_CS + (n)*0x8)
+#define CS_CNS			(1 << 21)
+
+#define _CPC			0x104
+#define CPC(n)			(_CPC + (n)*0x8)
+
+#define _SA			0x400
+#define SA(n)			(_SA + (n)*0x20)
+
+#define _DA			0x404
+#define DA(n)			(_DA + (n)*0x20)
+
+#define _CC			0x408
+#define CC(n)			(_CC + (n)*0x20)
+
+#define CC_SRCINC		(1 << 0)
+#define CC_DSTINC		(1 << 14)
+#define CC_SRCPRI		(1 << 8)
+#define CC_DSTPRI		(1 << 22)
+#define CC_SRCNS		(1 << 9)
+#define CC_DSTNS		(1 << 23)
+#define CC_SRCIA		(1 << 10)
+#define CC_DSTIA		(1 << 24)
+#define CC_SRCBRSTLEN_SHFT	4
+#define CC_DSTBRSTLEN_SHFT	18
+#define CC_SRCBRSTSIZE_SHFT	1
+#define CC_DSTBRSTSIZE_SHFT	15
+#define CC_SRCCCTRL_SHFT	11
+#define CC_SRCCCTRL_MASK	0x7
+#define CC_DSTCCTRL_SHFT	25
+#define CC_DRCCCTRL_MASK	0x7
+#define CC_SWAP_SHFT		28
+
+#define _LC0			0x40c
+#define LC0(n)			(_LC0 + (n)*0x20)
+
+#define _LC1			0x410
+#define LC1(n)			(_LC1 + (n)*0x20)
+
+#define DBGSTATUS		0xd00
+#define DBG_BUSY		(1 << 0)
+
+#define DBGCMD			0xd04
+#define DBGINST0		0xd08
+#define DBGINST1		0xd0c
+
+#define CR0			0xe00
+#define CR1			0xe04
+#define CR2			0xe08
+#define CR3			0xe0c
+#define CR4			0xe10
+#define CRD			0xe14
+
+#define PERIPH_ID		0xfe0
+#define PERIPH_REV_SHIFT	20
+#define PERIPH_REV_MASK		0xf
+#define PERIPH_REV_R0P0		0
+#define PERIPH_REV_R1P0		1
+#define PERIPH_REV_R1P1		2
+#define PCELL_ID		0xff0
+
+#define CR0_PERIPH_REQ_SET	(1 << 0)
+#define CR0_BOOT_EN_SET		(1 << 1)
+#define CR0_BOOT_MAN_NS		(1 << 2)
+#define CR0_NUM_CHANS_SHIFT	4
+#define CR0_NUM_CHANS_MASK	0x7
+#define CR0_NUM_PERIPH_SHIFT	12
+#define CR0_NUM_PERIPH_MASK	0x1f
+#define CR0_NUM_EVENTS_SHIFT	17
+#define CR0_NUM_EVENTS_MASK	0x1f
+
+#define CR1_ICACHE_LEN_SHIFT	0
+#define CR1_ICACHE_LEN_MASK	0x7
+#define CR1_NUM_ICACHELINES_SHIFT	4
+#define CR1_NUM_ICACHELINES_MASK	0xf
+
+#define CRD_DATA_WIDTH_SHIFT	0
+#define CRD_DATA_WIDTH_MASK	0x7
+#define CRD_WR_CAP_SHIFT	4
+#define CRD_WR_CAP_MASK		0x7
+#define CRD_WR_Q_DEP_SHIFT	8
+#define CRD_WR_Q_DEP_MASK	0xf
+#define CRD_RD_CAP_SHIFT	12
+#define CRD_RD_CAP_MASK		0x7
+#define CRD_RD_Q_DEP_SHIFT	16
+#define CRD_RD_Q_DEP_MASK	0xf
+#define CRD_DATA_BUFF_SHIFT	20
+#define CRD_DATA_BUFF_MASK	0x3ff
+
+#define PART			0x330
+#define DESIGNER		0x41
+#define REVISION		0x0
+#define INTEG_CFG		0x0
+#define PERIPH_ID_VAL		((PART << 0) | (DESIGNER << 12))
+
+#define PCELL_ID_VAL		0xb105f00d
+
+#define PL330_STATE_STOPPED		(1 << 0)
+#define PL330_STATE_EXECUTING		(1 << 1)
+#define PL330_STATE_WFE			(1 << 2)
+#define PL330_STATE_FAULTING		(1 << 3)
+#define PL330_STATE_COMPLETING		(1 << 4)
+#define PL330_STATE_WFP			(1 << 5)
+#define PL330_STATE_KILLING		(1 << 6)
+#define PL330_STATE_FAULT_COMPLETING	(1 << 7)
+#define PL330_STATE_CACHEMISS		(1 << 8)
+#define PL330_STATE_UPDTPC		(1 << 9)
+#define PL330_STATE_ATBARRIER		(1 << 10)
+#define PL330_STATE_QUEUEBUSY		(1 << 11)
+#define PL330_STATE_INVALID		(1 << 15)
+
+#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
+				| PL330_STATE_WFE | PL330_STATE_FAULTING)
+
+#define CMD_DMAADDH		0x54
+#define CMD_DMAEND		0x00
+#define CMD_DMAFLUSHP		0x35
+#define CMD_DMAGO		0xa0
+#define CMD_DMALD		0x04
+#define CMD_DMALDP		0x25
+#define CMD_DMALP		0x20
+#define CMD_DMALPEND		0x28
+#define CMD_DMAKILL		0x01
+#define CMD_DMAMOV		0xbc
+#define CMD_DMANOP		0x18
+#define CMD_DMARMB		0x12
+#define CMD_DMASEV		0x34
+#define CMD_DMAST		0x08
+#define CMD_DMASTP		0x29
+#define CMD_DMASTZ		0x0c
+#define CMD_DMAWFE		0x36
+#define CMD_DMAWFP		0x30
+#define CMD_DMAWMB		0x13
+
+#define SZ_DMAADDH		3
+#define SZ_DMAEND		1
+#define SZ_DMAFLUSHP		2
+#define SZ_DMALD		1
+#define SZ_DMALDP		2
+#define SZ_DMALP		2
+#define SZ_DMALPEND		2
+#define SZ_DMAKILL		1
+#define SZ_DMAMOV		6
+#define SZ_DMANOP		1
+#define SZ_DMARMB		1
+#define SZ_DMASEV		2
+#define SZ_DMAST		1
+#define SZ_DMASTP		2
+#define SZ_DMASTZ		1
+#define SZ_DMAWFE		2
+#define SZ_DMAWFP		2
+#define SZ_DMAWMB		1
+#define SZ_DMAGO		6
+
+#define BRST_LEN(ccr)		((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
+#define BRST_SIZE(ccr)		(1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
+
+#define BYTE_TO_BURST(b, ccr)	((b) / BRST_SIZE(ccr) / BRST_LEN(ccr))
+#define BURST_TO_BYTE(c, ccr)	((c) * BRST_SIZE(ccr) * BRST_LEN(ccr))
+
+/*
+ * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
+ * at 1byte/burst for P<->M and M<->M respectively.
+ * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
+ * should be enough for P<->M and M<->M respectively.
+ */
+#define MCODE_BUFF_PER_REQ	256
+
+/* If the _pl330_req is available to the client */
+#define IS_FREE(req)	(*((u8 *)((req)->mc_cpu)) == CMD_DMAEND)
+
+/* Use this _only_ to wait on transient states */
+#define UNTIL(t, s)	while (!(_state(t) & (s))) cpu_relax();
+
+#ifdef PL330_DEBUG_MCGEN
+static unsigned cmd_line;
+#define PL330_DBGCMD_DUMP(off, x...)	do { \
+						printk("%x:", cmd_line); \
+						printk(x); \
+						cmd_line += off; \
+					} while (0)
+#define PL330_DBGMC_START(addr)		(cmd_line = addr)
+#else
+#define PL330_DBGCMD_DUMP(off, x...)	do {} while (0)
+#define PL330_DBGMC_START(addr)		do {} while (0)
+#endif
+
+/* The number of default descriptors */
+
+#define NR_DEFAULT_DESC	16
+
+/* Populated by the PL330 core driver for DMA API driver's info */
+struct pl330_config {
+	u32	periph_id;
+	u32	pcell_id;
+#define DMAC_MODE_NS	(1 << 0)
+	unsigned int	mode;
+	unsigned int	data_bus_width:10; /* In number of bits */
+	unsigned int	data_buf_dep:10;
+	unsigned int	num_chan:4;
+	unsigned int	num_peri:6;
+	u32		peri_ns;
+	unsigned int	num_events:6;
+	u32		irq_ns;
+};
+
+/* Handle to the DMAC provided to the PL330 core */
+struct pl330_info {
+	/* Owning device */
+	struct device *dev;
+	/* Size of MicroCode buffers for each channel. */
+	unsigned mcbufsz;
+	/* ioremap'ed address of PL330 registers. */
+	void __iomem	*base;
+	/* Client can freely use it. */
+	void	*client_data;
+	/* PL330 core data, Client must not touch it. */
+	void	*pl330_data;
+	/* Populated by the PL330 core driver during pl330_add */
+	struct pl330_config	pcfg;
+	/*
+	 * If the DMAC has some reset mechanism, then the
+	 * client may want to provide pointer to the method.
+	 */
+	void (*dmac_reset)(struct pl330_info *pi);
+};
+
+/**
+ * Request Configuration.
+ * The PL330 core does not modify this and uses the last
+ * working configuration if the request doesn't provide any.
+ *
+ * The Client may want to provide this info only for the
+ * first request and a request with new settings.
+ */
+struct pl330_reqcfg {
+	/* Address Incrementing */
+	unsigned dst_inc:1;
+	unsigned src_inc:1;
+
+	/*
+	 * For now, the SRC & DST protection levels
+	 * and burst size/length are assumed same.
+	 */
+	bool nonsecure;
+	bool privileged;
+	bool insnaccess;
+	unsigned brst_len:5;
+	unsigned brst_size:3; /* in power of 2 */
+
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+	enum pl330_byteswap swap;
+	struct pl330_config *pcfg;
+};
+
+/*
+ * One cycle of DMAC operation.
+ * There may be more than one xfer in a request.
+ */
+struct pl330_xfer {
+	u32 src_addr;
+	u32 dst_addr;
+	/* Size to xfer */
+	u32 bytes;
+	/*
+	 * Pointer to next xfer in the list.
+	 * The last xfer in the req must point to NULL.
+	 */
+	struct pl330_xfer *next;
+};
+
+/* The xfer callbacks are made with one of these arguments. */
+enum pl330_op_err {
+	/* The all xfers in the request were success. */
+	PL330_ERR_NONE,
+	/* If req aborted due to global error. */
+	PL330_ERR_ABORT,
+	/* If req failed due to problem with Channel. */
+	PL330_ERR_FAIL,
+};
+
+/* A request defining Scatter-Gather List ending with NULL xfer. */
+struct pl330_req {
+	enum pl330_reqtype rqtype;
+	/* Index of peripheral for the xfer. */
+	unsigned peri:5;
+	/* Unique token for this xfer, set by the client. */
+	void *token;
+	/* Callback to be called after xfer. */
+	void (*xfer_cb)(void *token, enum pl330_op_err err);
+	/* If NULL, req will be done at last set parameters. */
+	struct pl330_reqcfg *cfg;
+	/* Pointer to first xfer in the request. */
+	struct pl330_xfer *x;
+};
+
+/*
+ * To know the status of the channel and DMAC, the client
+ * provides a pointer to this structure. The PL330 core
+ * fills it with current information.
+ */
+struct pl330_chanstatus {
+	/*
+	 * If the DMAC engine halted due to some error,
+	 * the client should remove-add DMAC.
+	 */
+	bool dmac_halted;
+	/*
+	 * If channel is halted due to some error,
+	 * the client should ABORT/FLUSH and START the channel.
+	 */
+	bool faulting;
+	/* Location of last load */
+	u32 src_addr;
+	/* Location of last store */
+	u32 dst_addr;
+	/*
+	 * Pointer to the currently active req, NULL if channel is
+	 * inactive, even though the requests may be present.
+	 */
+	struct pl330_req *top_req;
+	/* Pointer to req waiting second in the queue if any. */
+	struct pl330_req *wait_req;
+};
+
+enum pl330_chan_op {
+	/* Start the channel */
+	PL330_OP_START,
+	/* Abort the active xfer */
+	PL330_OP_ABORT,
+	/* Stop xfer and flush queue */
+	PL330_OP_FLUSH,
+};
+
+struct _xfer_spec {
+	u32 ccr;
+	struct pl330_req *r;
+	struct pl330_xfer *x;
+};
+
+enum dmamov_dst {
+	SAR = 0,
+	CCR,
+	DAR,
+};
+
+enum pl330_dst {
+	SRC = 0,
+	DST,
+};
+
+enum pl330_cond {
+	SINGLE,
+	BURST,
+	ALWAYS,
+};
+
+struct _pl330_req {
+	u32 mc_bus;
+	void *mc_cpu;
+	/* Number of bytes taken to setup MC for the req */
+	u32 mc_len;
+	struct pl330_req *r;
+	/* Hook to attach to DMAC's list of reqs with due callback */
+	struct list_head rqd;
+};
+
+/* ToBeDone for tasklet */
+struct _pl330_tbd {
+	bool reset_dmac;
+	bool reset_mngr;
+	u8 reset_chan;
+};
+
+/* A DMAC Thread */
+struct pl330_thread {
+	u8 id;
+	int ev;
+	/* If the channel is not yet acquired by any client */
+	bool free;
+	/* Parent DMAC */
+	struct pl330_dmac *dmac;
+	/* Only two at a time */
+	struct _pl330_req req[2];
+	/* Index of the last enqueued request */
+	unsigned lstenq;
+	/* Index of the last submitted request or -1 if the DMA is stopped */
+	int req_running;
+};
+
+enum pl330_dmac_state {
+	UNINIT,
+	INIT,
+	DYING,
+};
+
+/* A DMAC */
+struct pl330_dmac {
+	spinlock_t		lock;
+	/* Holds list of reqs with due callbacks */
+	struct list_head	req_done;
+	/* Pointer to platform specific stuff */
+	struct pl330_info	*pinfo;
+	/* Maximum possible events/irqs */
+	int			events[32];
+	/* BUS address of MicroCode buffer */
+	u32			mcode_bus;
+	/* CPU address of MicroCode buffer */
+	void			*mcode_cpu;
+	/* List of all Channel threads */
+	struct pl330_thread	*channels;
+	/* Pointer to the MANAGER thread */
+	struct pl330_thread	*manager;
+	/* To handle bad news in interrupt */
+	struct tasklet_struct	tasks;
+	struct _pl330_tbd	dmac_tbd;
+	/* State of DMAC operation */
+	enum pl330_dmac_state	state;
+};
+
+enum desc_status {
+	/* In the DMAC pool */
+	FREE,
+	/*
+	 * Allocted to some channel during prep_xxx
+	 * Also may be sitting on the work_list.
+	 */
+	PREP,
+	/*
+	 * Sitting on the work_list and already submitted
+	 * to the PL330 core. Not more than two descriptors
+	 * of a channel can be BUSY at any time.
+	 */
+	BUSY,
+	/*
+	 * Sitting on the channel work_list but xfer done
+	 * by PL330 core
+	 */
+	DONE,
+};
+
+struct dma_pl330_chan {
+	/* Schedule desc completion */
+	struct tasklet_struct task;
+
+	/* DMA-Engine Channel */
+	struct dma_chan chan;
+
+	/* List of to be xfered descriptors */
+	struct list_head work_list;
+
+	/* Pointer to the DMAC that manages this channel,
+	 * NULL if the channel is available to be acquired.
+	 * As the parent, this DMAC also provides descriptors
+	 * to the channel.
+	 */
+	struct dma_pl330_dmac *dmac;
+
+	/* To protect channel manipulation */
+	spinlock_t lock;
+
+	/* Token of a hardware channel thread of PL330 DMAC
+	 * NULL if the channel is available to be acquired.
+	 */
+	void *pl330_chid;
+
+	/* For D-to-M and M-to-D channels */
+	int burst_sz; /* the peripheral fifo width */
+	int burst_len; /* the number of burst */
+	dma_addr_t fifo_addr;
+
+	/* for cyclic capability */
+	bool cyclic;
+};
+
+struct dma_pl330_dmac {
+	struct pl330_info pif;
+
+	/* DMA-Engine Device */
+	struct dma_device ddma;
+
+	/* Pool of descriptors available for the DMAC's channels */
+	struct list_head desc_pool;
+	/* To protect desc_pool manipulation */
+	spinlock_t pool_lock;
+
+	/* Peripheral channels connected to this DMAC */
+	struct dma_pl330_chan *peripherals; /* keep at end */
+
+	struct clk *clk;
+};
+
+struct dma_pl330_desc {
+	/* To attach to a queue as child */
+	struct list_head node;
+
+	/* Descriptor for the DMA Engine API */
+	struct dma_async_tx_descriptor txd;
+
+	/* Xfer for PL330 core */
+	struct pl330_xfer px;
+
+	struct pl330_reqcfg rqcfg;
+	struct pl330_req req;
+
+	enum desc_status status;
+
+	/* The channel which currently holds this desc */
+	struct dma_pl330_chan *pchan;
+};
+
+static inline void _callback(struct pl330_req *r, enum pl330_op_err err)
+{
+	if (r && r->xfer_cb)
+		r->xfer_cb(r->token, err);
+}
+
+static inline bool _queue_empty(struct pl330_thread *thrd)
+{
+	return (IS_FREE(&thrd->req[0]) && IS_FREE(&thrd->req[1]))
+		? true : false;
+}
+
+static inline bool _queue_full(struct pl330_thread *thrd)
+{
+	return (IS_FREE(&thrd->req[0]) || IS_FREE(&thrd->req[1]))
+		? false : true;
+}
+
+static inline bool is_manager(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	/* MANAGER is indexed at the end */
+	if (thrd->id == pl330->pinfo->pcfg.num_chan)
+		return true;
+	else
+		return false;
+}
+
+/* If manager of the thread is in Non-Secure mode */
+static inline bool _manager_ns(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false;
+}
+
+static inline u32 get_id(struct pl330_info *pi, u32 off)
+{
+	void __iomem *regs = pi->base;
+	u32 id = 0;
+
+	id |= (readb(regs + off + 0x0) << 0);
+	id |= (readb(regs + off + 0x4) << 8);
+	id |= (readb(regs + off + 0x8) << 16);
+	id |= (readb(regs + off + 0xc) << 24);
+
+	return id;
+}
+
+static inline u32 get_revision(u32 periph_id)
+{
+	return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK;
+}
+
+static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
+		enum pl330_dst da, u16 val)
+{
+	if (dry_run)
+		return SZ_DMAADDH;
+
+	buf[0] = CMD_DMAADDH;
+	buf[0] |= (da << 1);
+	*((u16 *)&buf[1]) = val;
+
+	PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n",
+		da == 1 ? "DA" : "SA", val);
+
+	return SZ_DMAADDH;
+}
+
+static inline u32 _emit_END(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAEND;
+
+	buf[0] = CMD_DMAEND;
+
+	PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n");
+
+	return SZ_DMAEND;
+}
+
+static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAFLUSHP;
+
+	buf[0] = CMD_DMAFLUSHP;
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3);
+
+	return SZ_DMAFLUSHP;
+}
+
+static inline u32 _emit_LD(unsigned dry_run, u8 buf[],	enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMALD;
+
+	buf[0] = CMD_DMALD;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMALD;
+}
+
+static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMALDP;
+
+	buf[0] = CMD_DMALDP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMALDP;
+}
+
+static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
+		unsigned loop, u8 cnt)
+{
+	if (dry_run)
+		return SZ_DMALP;
+
+	buf[0] = CMD_DMALP;
+
+	if (loop)
+		buf[0] |= (1 << 1);
+
+	cnt--; /* DMAC increments by 1 internally */
+	buf[1] = cnt;
+
+	PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt);
+
+	return SZ_DMALP;
+}
+
+struct _arg_LPEND {
+	enum pl330_cond cond;
+	bool forever;
+	unsigned loop;
+	u8 bjump;
+};
+
+static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
+		const struct _arg_LPEND *arg)
+{
+	enum pl330_cond cond = arg->cond;
+	bool forever = arg->forever;
+	unsigned loop = arg->loop;
+	u8 bjump = arg->bjump;
+
+	if (dry_run)
+		return SZ_DMALPEND;
+
+	buf[0] = CMD_DMALPEND;
+
+	if (loop)
+		buf[0] |= (1 << 2);
+
+	if (!forever)
+		buf[0] |= (1 << 4);
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	buf[1] = bjump;
+
+	PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n",
+			forever ? "FE" : "END",
+			cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'),
+			loop ? '1' : '0',
+			bjump);
+
+	return SZ_DMALPEND;
+}
+
+static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAKILL;
+
+	buf[0] = CMD_DMAKILL;
+
+	return SZ_DMAKILL;
+}
+
+static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
+		enum dmamov_dst dst, u32 val)
+{
+	if (dry_run)
+		return SZ_DMAMOV;
+
+	buf[0] = CMD_DMAMOV;
+	buf[1] = dst;
+	*((u32 *)&buf[2]) = val;
+
+	PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
+		dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
+
+	return SZ_DMAMOV;
+}
+
+static inline u32 _emit_NOP(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMANOP;
+
+	buf[0] = CMD_DMANOP;
+
+	PL330_DBGCMD_DUMP(SZ_DMANOP, "\tDMANOP\n");
+
+	return SZ_DMANOP;
+}
+
+static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMARMB;
+
+	buf[0] = CMD_DMARMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n");
+
+	return SZ_DMARMB;
+}
+
+static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
+{
+	if (dry_run)
+		return SZ_DMASEV;
+
+	buf[0] = CMD_DMASEV;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3);
+
+	return SZ_DMASEV;
+}
+
+static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMAST;
+
+	buf[0] = CMD_DMAST;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMAST;
+}
+
+static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMASTP;
+
+	buf[0] = CMD_DMASTP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMASTP;
+}
+
+static inline u32 _emit_STZ(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMASTZ;
+
+	buf[0] = CMD_DMASTZ;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTZ, "\tDMASTZ\n");
+
+	return SZ_DMASTZ;
+}
+
+static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev,
+		unsigned invalidate)
+{
+	if (dry_run)
+		return SZ_DMAWFE;
+
+	buf[0] = CMD_DMAWFE;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	if (invalidate)
+		buf[1] |= (1 << 1);
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFE, "\tDMAWFE %u%s\n",
+		ev >> 3, invalidate ? ", I" : "");
+
+	return SZ_DMAWFE;
+}
+
+static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAWFP;
+
+	buf[0] = CMD_DMAWFP;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (0 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (0 << 0);
+	else
+		buf[0] |= (0 << 1) | (1 << 0);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3);
+
+	return SZ_DMAWFP;
+}
+
+static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAWMB;
+
+	buf[0] = CMD_DMAWMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n");
+
+	return SZ_DMAWMB;
+}
+
+struct _arg_GO {
+	u8 chan;
+	u32 addr;
+	unsigned ns;
+};
+
+static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
+		const struct _arg_GO *arg)
+{
+	u8 chan = arg->chan;
+	u32 addr = arg->addr;
+	unsigned ns = arg->ns;
+
+	if (dry_run)
+		return SZ_DMAGO;
+
+	buf[0] = CMD_DMAGO;
+	buf[0] |= (ns << 1);
+
+	buf[1] = chan & 0x7;
+
+	*((u32 *)&buf[2]) = addr;
+
+	return SZ_DMAGO;
+}
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+/* Returns Time-Out */
+static bool _until_dmac_idle(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	unsigned long loops = msecs_to_loops(5);
+
+	do {
+		/* Until Manager is Idle */
+		if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
+			break;
+
+		cpu_relax();
+	} while (--loops);
+
+	if (!loops)
+		return true;
+
+	return false;
+}
+
+static inline void _execute_DBGINSN(struct pl330_thread *thrd,
+		u8 insn[], bool as_manager)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u32 val;
+
+	val = (insn[0] << 16) | (insn[1] << 24);
+	if (!as_manager) {
+		val |= (1 << 0);
+		val |= (thrd->id << 8); /* Channel Number */
+	}
+	writel(val, regs + DBGINST0);
+
+	val = *((u32 *)&insn[2]);
+	writel(val, regs + DBGINST1);
+
+	/* If timed out due to halted state-machine */
+	if (_until_dmac_idle(thrd)) {
+		dev_err(thrd->dmac->pinfo->dev, "DMAC halted!\n");
+		return;
+	}
+
+	/* Get going */
+	writel(0, regs + DBGCMD);
+}
+
+/*
+ * Mark a _pl330_req as free.
+ * We do it by writing DMAEND as the first instruction
+ * because no valid request is going to have DMAEND as
+ * its first instruction to execute.
+ */
+static void mark_free(struct pl330_thread *thrd, int idx)
+{
+	struct _pl330_req *req = &thrd->req[idx];
+
+	_emit_END(0, req->mc_cpu);
+	req->mc_len = 0;
+
+	thrd->req_running = -1;
+}
+
+static inline u32 _state(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u32 val;
+
+	if (is_manager(thrd))
+		val = readl(regs + DS) & 0xf;
+	else
+		val = readl(regs + CS(thrd->id)) & 0xf;
+
+	switch (val) {
+	case DS_ST_STOP:
+		return PL330_STATE_STOPPED;
+	case DS_ST_EXEC:
+		return PL330_STATE_EXECUTING;
+	case DS_ST_CMISS:
+		return PL330_STATE_CACHEMISS;
+	case DS_ST_UPDTPC:
+		return PL330_STATE_UPDTPC;
+	case DS_ST_WFE:
+		return PL330_STATE_WFE;
+	case DS_ST_FAULT:
+		return PL330_STATE_FAULTING;
+	case DS_ST_ATBRR:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_ATBARRIER;
+	case DS_ST_QBUSY:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_QUEUEBUSY;
+	case DS_ST_WFP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_WFP;
+	case DS_ST_KILL:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_KILLING;
+	case DS_ST_CMPLT:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_COMPLETING;
+	case DS_ST_FLTCMP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_FAULT_COMPLETING;
+	default:
+		return PL330_STATE_INVALID;
+	}
+}
+
+static void _stop(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+
+	if (_state(thrd) == PL330_STATE_FAULT_COMPLETING)
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+	/* Return if nothing needs to be done */
+	if (_state(thrd) == PL330_STATE_COMPLETING
+		  || _state(thrd) == PL330_STATE_KILLING
+		  || _state(thrd) == PL330_STATE_STOPPED)
+		return;
+
+	_emit_KILL(0, insn);
+
+	/* Stop generating interrupts for SEV */
+	writel(readl(regs + INTEN) & ~(1 << thrd->ev), regs + INTEN);
+
+	_execute_DBGINSN(thrd, insn, is_manager(thrd));
+}
+
+/* Start doing req 'idx' of thread 'thrd' */
+static bool _trigger(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	struct _pl330_req *req;
+	struct pl330_req *r;
+	struct _arg_GO go;
+	unsigned ns;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+	int idx;
+
+	/* Return if already ACTIVE */
+	if (_state(thrd) != PL330_STATE_STOPPED)
+		return true;
+
+	idx = 1 - thrd->lstenq;
+	if (!IS_FREE(&thrd->req[idx]))
+		req = &thrd->req[idx];
+	else {
+		idx = thrd->lstenq;
+		if (!IS_FREE(&thrd->req[idx]))
+			req = &thrd->req[idx];
+		else
+			req = NULL;
+	}
+
+	/* Return if no request */
+	if (!req || !req->r)
+		return true;
+
+	r = req->r;
+
+	if (r->cfg)
+		ns = r->cfg->nonsecure ? 1 : 0;
+	else if (readl(regs + CS(thrd->id)) & CS_CNS)
+		ns = 1;
+	else
+		ns = 0;
+
+	/* See 'Abort Sources' point-4 at Page 2-25 */
+	if (_manager_ns(thrd) && !ns)
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d Recipe for ABORT!\n",
+			__func__, __LINE__);
+
+	go.chan = thrd->id;
+	go.addr = req->mc_bus;
+	go.ns = ns;
+	_emit_GO(0, insn, &go);
+
+	/* Set to generate interrupts for SEV */
+	writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
+
+	/* Only manager can execute GO */
+	_execute_DBGINSN(thrd, insn, true);
+
+	thrd->req_running = idx;
+
+	return true;
+}
+
+static bool _start(struct pl330_thread *thrd)
+{
+	switch (_state(thrd)) {
+	case PL330_STATE_FAULT_COMPLETING:
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+		if (_state(thrd) == PL330_STATE_KILLING)
+			UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_FAULTING:
+		_stop(thrd);
+
+	case PL330_STATE_KILLING:
+	case PL330_STATE_COMPLETING:
+		UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_STOPPED:
+		return _trigger(thrd);
+
+	case PL330_STATE_WFP:
+	case PL330_STATE_QUEUEBUSY:
+	case PL330_STATE_ATBARRIER:
+	case PL330_STATE_UPDTPC:
+	case PL330_STATE_CACHEMISS:
+	case PL330_STATE_EXECUTING:
+		return true;
+
+	case PL330_STATE_WFE: /* For RESUME, nothing yet */
+	default:
+		return false;
+	}
+}
+
+static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+	struct pl330_config *pcfg = pxs->r->cfg->pcfg;
+
+	/* check lock-up free version */
+	if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		}
+	} else {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_RMB(dry_run, &buf[off]);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+			off += _emit_WMB(dry_run, &buf[off]);
+		}
+	}
+
+	return off;
+}
+
+static inline int _ldst_devtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static inline int _ldst_memtodev(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LD(dry_run, &buf[off], ALWAYS);
+		off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static int _bursts(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	switch (pxs->r->rqtype) {
+	case MEMTODEV:
+		off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc);
+		break;
+	case DEVTOMEM:
+		off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+	case MEMTOMEM:
+		off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+	default:
+		off += 0x40000000; /* Scare off the Client */
+		break;
+	}
+
+	return off;
+}
+
+/* Returns bytes consumed and updates bursts */
+static inline int _loop(unsigned dry_run, u8 buf[],
+		unsigned long *bursts, const struct _xfer_spec *pxs)
+{
+	int cyc, cycmax, szlp, szlpend, szbrst, off;
+	unsigned lcnt0, lcnt1, ljmp0, ljmp1;
+	struct _arg_LPEND lpend;
+
+	/* Max iterations possible in DMALP is 256 */
+	if (*bursts >= 256*256) {
+		lcnt1 = 256;
+		lcnt0 = 256;
+		cyc = *bursts / lcnt1 / lcnt0;
+	} else if (*bursts > 256) {
+		lcnt1 = 256;
+		lcnt0 = *bursts / lcnt1;
+		cyc = 1;
+	} else {
+		lcnt1 = *bursts;
+		lcnt0 = 0;
+		cyc = 1;
+	}
+
+	szlp = _emit_LP(1, buf, 0, 0);
+	szbrst = _bursts(1, buf, pxs, 1);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 0;
+	lpend.bjump = 0;
+	szlpend = _emit_LPEND(1, buf, &lpend);
+
+	if (lcnt0) {
+		szlp *= 2;
+		szlpend *= 2;
+	}
+
+	/*
+	 * Max bursts that we can unroll due to limit on the
+	 * size of backward jump that can be encoded in DMALPEND
+	 * which is 8-bits and hence 255
+	 */
+	cycmax = (255 - (szlp + szlpend)) / szbrst;
+
+	cyc = (cycmax < cyc) ? cycmax : cyc;
+
+	off = 0;
+
+	if (lcnt0) {
+		off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
+		ljmp0 = off;
+	}
+
+	off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
+	ljmp1 = off;
+
+	off += _bursts(dry_run, &buf[off], pxs, cyc);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 1;
+	lpend.bjump = off - ljmp1;
+	off += _emit_LPEND(dry_run, &buf[off], &lpend);
+
+	if (lcnt0) {
+		lpend.cond = ALWAYS;
+		lpend.forever = false;
+		lpend.loop = 0;
+		lpend.bjump = off - ljmp0;
+		off += _emit_LPEND(dry_run, &buf[off], &lpend);
+	}
+
+	*bursts = lcnt1 * cyc;
+	if (lcnt0)
+		*bursts *= lcnt0;
+
+	return off;
+}
+
+static inline int _setup_loops(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	u32 ccr = pxs->ccr;
+	unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
+	int off = 0;
+
+	while (bursts) {
+		c = bursts;
+		off += _loop(dry_run, &buf[off], &c, pxs);
+		bursts -= c;
+	}
+
+	return off;
+}
+
+static inline int _setup_xfer(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	int off = 0;
+
+	/* DMAMOV SAR, x->src_addr */
+	off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
+	/* DMAMOV DAR, x->dst_addr */
+	off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
+
+	/* Setup Loop(s) */
+	off += _setup_loops(dry_run, &buf[off], pxs);
+
+	return off;
+}
+
+/*
+ * A req is a sequence of one or more xfer units.
+ * Returns the number of bytes taken to setup the MC for the req.
+ */
+static int _setup_req(unsigned dry_run, struct pl330_thread *thrd,
+		unsigned index, struct _xfer_spec *pxs)
+{
+	struct _pl330_req *req = &thrd->req[index];
+	struct pl330_xfer *x;
+	u8 *buf = req->mc_cpu;
+	int off = 0;
+
+	PL330_DBGMC_START(req->mc_bus);
+
+	/* DMAMOV CCR, ccr */
+	off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
+
+	x = pxs->r->x;
+	do {
+		/* Error if xfer length is not aligned at burst size */
+		if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr)))
+			return -EINVAL;
+
+		pxs->x = x;
+		off += _setup_xfer(dry_run, &buf[off], pxs);
+
+		x = x->next;
+	} while (x);
+
+	/* DMASEV peripheral/event */
+	off += _emit_SEV(dry_run, &buf[off], thrd->ev);
+	/* DMAEND */
+	off += _emit_END(dry_run, &buf[off]);
+
+	return off;
+}
+
+static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
+{
+	u32 ccr = 0;
+
+	if (rqc->src_inc)
+		ccr |= CC_SRCINC;
+
+	if (rqc->dst_inc)
+		ccr |= CC_DSTINC;
+
+	/* We set same protection levels for Src and DST for now */
+	if (rqc->privileged)
+		ccr |= CC_SRCPRI | CC_DSTPRI;
+	if (rqc->nonsecure)
+		ccr |= CC_SRCNS | CC_DSTNS;
+	if (rqc->insnaccess)
+		ccr |= CC_SRCIA | CC_DSTIA;
+
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+
+	ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
+	ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+
+	ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT);
+	ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT);
+
+	ccr |= (rqc->swap << CC_SWAP_SHFT);
+
+	return ccr;
+}
+
+static inline bool _is_valid(u32 ccr)
+{
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+
+	dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK;
+	scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK;
+
+	if (dcctl == DINVALID1 || dcctl == DINVALID2
+			|| scctl == SINVALID1 || scctl == SINVALID2)
+		return false;
+	else
+		return true;
+}
+
+/*
+ * Submit a list of xfers after which the client wants notification.
+ * Client is not notified after each xfer unit, just once after all
+ * xfer units are done or some error occurs.
+ */
+static int pl330_submit_req(void *ch_id, struct pl330_req *r)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	struct pl330_info *pi;
+	struct _xfer_spec xs;
+	unsigned long flags;
+	void __iomem *regs;
+	unsigned idx;
+	u32 ccr;
+	int ret = 0;
+
+	/* No Req or Unacquired Channel or DMAC */
+	if (!r || !thrd || thrd->free)
+		return -EINVAL;
+
+	pl330 = thrd->dmac;
+	pi = pl330->pinfo;
+	regs = pi->base;
+
+	if (pl330->state == DYING
+		|| pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d\n",
+			__func__, __LINE__);
+		return -EAGAIN;
+	}
+
+	/* If request for non-existing peripheral */
+	if (r->rqtype != MEMTOMEM && r->peri >= pi->pcfg.num_peri) {
+		dev_info(thrd->dmac->pinfo->dev,
+				"%s:%d Invalid peripheral(%u)!\n",
+				__func__, __LINE__, r->peri);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	if (_queue_full(thrd)) {
+		ret = -EAGAIN;
+		goto xfer_exit;
+	}
+
+	/* Prefer Secure Channel */
+	if (!_manager_ns(thrd))
+		r->cfg->nonsecure = 0;
+	else
+		r->cfg->nonsecure = 1;
+
+	/* Use last settings, if not provided */
+	if (r->cfg)
+		ccr = _prepare_ccr(r->cfg);
+	else
+		ccr = readl(regs + CC(thrd->id));
+
+	/* If this req doesn't have valid xfer settings */
+	if (!_is_valid(ccr)) {
+		ret = -EINVAL;
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d Invalid CCR(%x)!\n",
+			__func__, __LINE__, ccr);
+		goto xfer_exit;
+	}
+
+	idx = IS_FREE(&thrd->req[0]) ? 0 : 1;
+
+	xs.ccr = ccr;
+	xs.r = r;
+
+	/* First dry run to check if req is acceptable */
+	ret = _setup_req(1, thrd, idx, &xs);
+	if (ret < 0)
+		goto xfer_exit;
+
+	if (ret > pi->mcbufsz / 2) {
+		dev_info(thrd->dmac->pinfo->dev,
+			"%s:%d Trying increasing mcbufsz\n",
+				__func__, __LINE__);
+		ret = -ENOMEM;
+		goto xfer_exit;
+	}
+
+	/* Hook the request */
+	thrd->lstenq = idx;
+	thrd->req[idx].mc_len = _setup_req(0, thrd, idx, &xs);
+	thrd->req[idx].r = r;
+
+	ret = 0;
+
+xfer_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return ret;
+}
+
+static void pl330_dotask(unsigned long data)
+{
+	struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
+	struct pl330_info *pi = pl330->pinfo;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	/* The DMAC itself gone nuts */
+	if (pl330->dmac_tbd.reset_dmac) {
+		pl330->state = DYING;
+		/* Reset the manager too */
+		pl330->dmac_tbd.reset_mngr = true;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_dmac = false;
+	}
+
+	if (pl330->dmac_tbd.reset_mngr) {
+		_stop(pl330->manager);
+		/* Reset all channels */
+		pl330->dmac_tbd.reset_chan = (1 << pi->pcfg.num_chan) - 1;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_mngr = false;
+	}
+
+	for (i = 0; i < pi->pcfg.num_chan; i++) {
+
+		if (pl330->dmac_tbd.reset_chan & (1 << i)) {
+			struct pl330_thread *thrd = &pl330->channels[i];
+			void __iomem *regs = pi->base;
+			enum pl330_op_err err;
+
+			_stop(thrd);
+
+			if (readl(regs + FSC) & (1 << thrd->id))
+				err = PL330_ERR_FAIL;
+			else
+				err = PL330_ERR_ABORT;
+
+			spin_unlock_irqrestore(&pl330->lock, flags);
+
+			_callback(thrd->req[1 - thrd->lstenq].r, err);
+			_callback(thrd->req[thrd->lstenq].r, err);
+
+			spin_lock_irqsave(&pl330->lock, flags);
+
+			thrd->req[0].r = NULL;
+			thrd->req[1].r = NULL;
+			mark_free(thrd, 0);
+			mark_free(thrd, 1);
+
+			/* Clear the reset flag */
+			pl330->dmac_tbd.reset_chan &= ~(1 << i);
+		}
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return;
+}
+
+/* Returns 1 if state was updated, 0 otherwise */
+static int pl330_update(const struct pl330_info *pi)
+{
+	struct _pl330_req *rqdone;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	void __iomem *regs;
+	u32 val;
+	int id, ev, ret = 0;
+
+	if (!pi || !pi->pl330_data)
+		return 0;
+
+	regs = pi->base;
+	pl330 = pi->pl330_data;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	val = readl(regs + FSM) & 0x1;
+	if (val)
+		pl330->dmac_tbd.reset_mngr = true;
+	else
+		pl330->dmac_tbd.reset_mngr = false;
+
+	val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1);
+	pl330->dmac_tbd.reset_chan |= val;
+	if (val) {
+		int i = 0;
+		while (i < pi->pcfg.num_chan) {
+			if (val & (1 << i)) {
+				dev_info(pi->dev,
+					"Reset Channel-%d\t CS-%x FTC-%x\n",
+						i, readl(regs + CS(i)),
+						readl(regs + FTC(i)));
+				_stop(&pl330->channels[i]);
+			}
+			i++;
+		}
+	}
+
+	/* Check which event happened i.e, thread notified */
+	val = readl(regs + ES);
+	if (pi->pcfg.num_events < 32
+			&& val & ~((1 << pi->pcfg.num_events) - 1)) {
+		pl330->dmac_tbd.reset_dmac = true;
+		dev_err(pi->dev, "%s:%d Unexpected!\n", __func__, __LINE__);
+		ret = 1;
+		goto updt_exit;
+	}
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++) {
+		if (val & (1 << ev)) { /* Event occurred */
+			struct pl330_thread *thrd;
+			u32 inten = readl(regs + INTEN);
+			int active;
+
+			/* Clear the event */
+			if (inten & (1 << ev))
+				writel(1 << ev, regs + INTCLR);
+
+			ret = 1;
+
+			id = pl330->events[ev];
+
+			thrd = &pl330->channels[id];
+
+			active = thrd->req_running;
+			if (active == -1) /* Aborted */
+				continue;
+
+			rqdone = &thrd->req[active];
+			mark_free(thrd, active);
+
+			/* Get going again ASAP */
+			_start(thrd);
+
+			/* For now, just make a list of callbacks to be done */
+			list_add_tail(&rqdone->rqd, &pl330->req_done);
+		}
+	}
+
+	/* Now that we are in no hurry, do the callbacks */
+	while (!list_empty(&pl330->req_done)) {
+		struct pl330_req *r;
+
+		rqdone = container_of(pl330->req_done.next,
+					struct _pl330_req, rqd);
+
+		list_del_init(&rqdone->rqd);
+
+		/* Detach the req */
+		r = rqdone->r;
+		rqdone->r = NULL;
+
+		spin_unlock_irqrestore(&pl330->lock, flags);
+		_callback(r, PL330_ERR_NONE);
+		spin_lock_irqsave(&pl330->lock, flags);
+	}
+
+updt_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	if (pl330->dmac_tbd.reset_dmac
+			|| pl330->dmac_tbd.reset_mngr
+			|| pl330->dmac_tbd.reset_chan) {
+		ret = 1;
+		tasklet_schedule(&pl330->tasks);
+	}
+
+	return ret;
+}
+
+static int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	int ret = 0, active;
+
+	if (!thrd || thrd->free || thrd->dmac->state == DYING)
+		return -EINVAL;
+
+	pl330 = thrd->dmac;
+	active = thrd->req_running;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	switch (op) {
+	case PL330_OP_FLUSH:
+		/* Make sure the channel is stopped */
+		_stop(thrd);
+
+		thrd->req[0].r = NULL;
+		thrd->req[1].r = NULL;
+		mark_free(thrd, 0);
+		mark_free(thrd, 1);
+		break;
+
+	case PL330_OP_ABORT:
+		/* Make sure the channel is stopped */
+		_stop(thrd);
+
+		/* ABORT is only for the active req */
+		if (active == -1)
+			break;
+
+		thrd->req[active].r = NULL;
+		mark_free(thrd, active);
+
+		/* Start the next */
+	case PL330_OP_START:
+		if ((active == -1) && !_start(thrd))
+			ret = -EIO;
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+	return ret;
+}
+
+/* Reserve an event */
+static inline int _alloc_event(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+	int ev;
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++)
+		if (pl330->events[ev] == -1) {
+			pl330->events[ev] = thrd->id;
+			return ev;
+		}
+
+	return -1;
+}
+
+static bool _chan_ns(const struct pl330_info *pi, int i)
+{
+	return pi->pcfg.irq_ns & (1 << i);
+}
+
+/* Upon success, returns IdentityToken for the
+ * allocated channel, NULL otherwise.
+ */
+static void *pl330_request_channel(const struct pl330_info *pi)
+{
+	struct pl330_thread *thrd = NULL;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	int chans, i;
+
+	if (!pi || !pi->pl330_data)
+		return NULL;
+
+	pl330 = pi->pl330_data;
+
+	if (pl330->state == DYING)
+		return NULL;
+
+	chans = pi->pcfg.num_chan;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		if ((thrd->free) && (!_manager_ns(thrd) ||
+					_chan_ns(pi, i))) {
+			thrd->ev = _alloc_event(thrd);
+			if (thrd->ev >= 0) {
+				thrd->free = false;
+				thrd->lstenq = 1;
+				thrd->req[0].r = NULL;
+				mark_free(thrd, 0);
+				thrd->req[1].r = NULL;
+				mark_free(thrd, 1);
+				break;
+			}
+		}
+		thrd = NULL;
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return thrd;
+}
+
+/* Release an event */
+static inline void _free_event(struct pl330_thread *thrd, int ev)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+
+	/* If the event is valid and was held by the thread */
+	if (ev >= 0 && ev < pi->pcfg.num_events
+			&& pl330->events[ev] == thrd->id)
+		pl330->events[ev] = -1;
+}
+
+static void pl330_release_channel(void *ch_id)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+
+	if (!thrd || thrd->free)
+		return;
+
+	_stop(thrd);
+
+	_callback(thrd->req[1 - thrd->lstenq].r, PL330_ERR_ABORT);
+	_callback(thrd->req[thrd->lstenq].r, PL330_ERR_ABORT);
+
+	pl330 = thrd->dmac;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+	_free_event(thrd, thrd->ev);
+	thrd->free = true;
+	spin_unlock_irqrestore(&pl330->lock, flags);
+}
+
+/* Initialize the structure for PL330 configuration, that can be used
+ * by the client driver the make best use of the DMAC
+ */
+static void read_dmac_config(struct pl330_info *pi)
+{
+	void __iomem *regs = pi->base;
+	u32 val;
+
+	val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
+	val &= CRD_DATA_WIDTH_MASK;
+	pi->pcfg.data_bus_width = 8 * (1 << val);
+
+	val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT;
+	val &= CRD_DATA_BUFF_MASK;
+	pi->pcfg.data_buf_dep = val + 1;
+
+	val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
+	val &= CR0_NUM_CHANS_MASK;
+	val += 1;
+	pi->pcfg.num_chan = val;
+
+	val = readl(regs + CR0);
+	if (val & CR0_PERIPH_REQ_SET) {
+		val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
+		val += 1;
+		pi->pcfg.num_peri = val;
+		pi->pcfg.peri_ns = readl(regs + CR4);
+	} else {
+		pi->pcfg.num_peri = 0;
+	}
+
+	val = readl(regs + CR0);
+	if (val & CR0_BOOT_MAN_NS)
+		pi->pcfg.mode |= DMAC_MODE_NS;
+	else
+		pi->pcfg.mode &= ~DMAC_MODE_NS;
+
+	val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
+	val &= CR0_NUM_EVENTS_MASK;
+	val += 1;
+	pi->pcfg.num_events = val;
+
+	pi->pcfg.irq_ns = readl(regs + CR3);
+
+	pi->pcfg.periph_id = get_id(pi, PERIPH_ID);
+	pi->pcfg.pcell_id = get_id(pi, PCELL_ID);
+}
+
+static inline void _reset_thread(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+
+	thrd->req[0].mc_cpu = pl330->mcode_cpu
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].mc_bus = pl330->mcode_bus
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].r = NULL;
+	mark_free(thrd, 0);
+
+	thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+				+ pi->mcbufsz / 2;
+	thrd->req[1].mc_bus = thrd->req[0].mc_bus
+				+ pi->mcbufsz / 2;
+	thrd->req[1].r = NULL;
+	mark_free(thrd, 1);
+}
+
+static int dmac_alloc_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Allocate 1 Manager and 'chans' Channel threads */
+	pl330->channels = kzalloc((1 + chans) * sizeof(*thrd),
+					GFP_KERNEL);
+	if (!pl330->channels)
+		return -ENOMEM;
+
+	/* Init Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		thrd->id = i;
+		thrd->dmac = pl330;
+		_reset_thread(thrd);
+		thrd->free = true;
+	}
+
+	/* MANAGER is indexed at the end */
+	thrd = &pl330->channels[chans];
+	thrd->id = chans;
+	thrd->dmac = pl330;
+	thrd->free = false;
+	pl330->manager = thrd;
+
+	return 0;
+}
+
+static int dmac_alloc_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	int ret;
+
+	/*
+	 * Alloc MicroCode buffer for 'chans' Channel threads.
+	 * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
+	 */
+	pl330->mcode_cpu = dma_alloc_coherent(pi->dev,
+				chans * pi->mcbufsz,
+				&pl330->mcode_bus, GFP_KERNEL);
+	if (!pl330->mcode_cpu) {
+		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	ret = dmac_alloc_threads(pl330);
+	if (ret) {
+		dev_err(pi->dev, "%s:%d Can't to create channels for DMAC!\n",
+			__func__, __LINE__);
+		dma_free_coherent(pi->dev,
+				chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pl330_add(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+	void __iomem *regs;
+	int i, ret;
+
+	if (!pi || !pi->dev)
+		return -EINVAL;
+
+	/* If already added */
+	if (pi->pl330_data)
+		return -EINVAL;
+
+	/*
+	 * If the SoC can perform reset on the DMAC, then do it
+	 * before reading its configuration.
+	 */
+	if (pi->dmac_reset)
+		pi->dmac_reset(pi);
+
+	regs = pi->base;
+
+	/* Check if we can handle this DMAC */
+	if ((get_id(pi, PERIPH_ID) & 0xfffff) != PERIPH_ID_VAL
+	   || get_id(pi, PCELL_ID) != PCELL_ID_VAL) {
+		dev_err(pi->dev, "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
+			get_id(pi, PERIPH_ID), get_id(pi, PCELL_ID));
+		return -EINVAL;
+	}
+
+	/* Read the configuration of the DMAC */
+	read_dmac_config(pi);
+
+	if (pi->pcfg.num_events == 0) {
+		dev_err(pi->dev, "%s:%d Can't work without events!\n",
+			__func__, __LINE__);
+		return -EINVAL;
+	}
+
+	pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL);
+	if (!pl330) {
+		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	/* Assign the info structure and private data */
+	pl330->pinfo = pi;
+	pi->pl330_data = pl330;
+
+	spin_lock_init(&pl330->lock);
+
+	INIT_LIST_HEAD(&pl330->req_done);
+
+	/* Use default MC buffer size if not provided */
+	if (!pi->mcbufsz)
+		pi->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+
+	/* Mark all events as free */
+	for (i = 0; i < pi->pcfg.num_events; i++)
+		pl330->events[i] = -1;
+
+	/* Allocate resources needed by the DMAC */
+	ret = dmac_alloc_resources(pl330);
+	if (ret) {
+		dev_err(pi->dev, "Unable to create channels for DMAC\n");
+		kfree(pl330);
+		return ret;
+	}
+
+	tasklet_init(&pl330->tasks, pl330_dotask, (unsigned long) pl330);
+
+	pl330->state = INIT;
+
+	return 0;
+}
+
+static int dmac_free_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Release Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		pl330_release_channel((void *)thrd);
+	}
+
+	/* Free memory */
+	kfree(pl330->channels);
+
+	return 0;
+}
+
+static void dmac_free_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+
+	dmac_free_threads(pl330);
+
+	dma_free_coherent(pi->dev, chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+}
+
+static void pl330_del(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+
+	if (!pi || !pi->pl330_data)
+		return;
+
+	pl330 = pi->pl330_data;
+
+	pl330->state = UNINIT;
+
+	tasklet_kill(&pl330->tasks);
+
+	/* Free DMAC resources */
+	dmac_free_resources(pl330);
+
+	kfree(pl330);
+	pi->pl330_data = NULL;
+}
+
+/* forward declaration */
+static struct amba_driver pl330_driver;
+
+static inline struct dma_pl330_chan *
+to_pchan(struct dma_chan *ch)
+{
+	if (!ch)
+		return NULL;
+
+	return container_of(ch, struct dma_pl330_chan, chan);
+}
+
+static inline struct dma_pl330_desc *
+to_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct dma_pl330_desc, txd);
+}
+
+static inline void free_desc_list(struct list_head *list)
+{
+	struct dma_pl330_dmac *pdmac;
+	struct dma_pl330_desc *desc;
+	struct dma_pl330_chan *pch = NULL;
+	unsigned long flags;
+
+	/* Finish off the work list */
+	list_for_each_entry(desc, list, node) {
+		dma_async_tx_callback callback;
+		void *param;
+
+		/* All desc in a list belong to same channel */
+		pch = desc->pchan;
+		callback = desc->txd.callback;
+		param = desc->txd.callback_param;
+
+		if (callback)
+			callback(param);
+
+		desc->pchan = NULL;
+	}
+
+	/* pch will be unset if list was empty */
+	if (!pch)
+		return;
+
+	pdmac = pch->dmac;
+
+	spin_lock_irqsave(&pdmac->pool_lock, flags);
+	list_splice_tail_init(list, &pdmac->desc_pool);
+	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+}
+
+static inline void handle_cyclic_desc_list(struct list_head *list)
+{
+	struct dma_pl330_desc *desc;
+	struct dma_pl330_chan *pch = NULL;
+	unsigned long flags;
+
+	list_for_each_entry(desc, list, node) {
+		dma_async_tx_callback callback;
+
+		/* Change status to reload it */
+		desc->status = PREP;
+		pch = desc->pchan;
+		callback = desc->txd.callback;
+		if (callback)
+			callback(desc->txd.callback_param);
+	}
+
+	/* pch will be unset if list was empty */
+	if (!pch)
+		return;
+
+	spin_lock_irqsave(&pch->lock, flags);
+	list_splice_tail_init(list, &pch->work_list);
+	spin_unlock_irqrestore(&pch->lock, flags);
+}
+
+static inline void fill_queue(struct dma_pl330_chan *pch)
+{
+	struct dma_pl330_desc *desc;
+	int ret;
+
+	list_for_each_entry(desc, &pch->work_list, node) {
+
+		/* If already submitted */
+		if (desc->status == BUSY)
+			break;
+
+		ret = pl330_submit_req(pch->pl330_chid,
+						&desc->req);
+		if (!ret) {
+			desc->status = BUSY;
+			break;
+		} else if (ret == -EAGAIN) {
+			/* QFull or DMAC Dying */
+			break;
+		} else {
+			/* Unacceptable request */
+			desc->status = DONE;
+			dev_err(pch->dmac->pif.dev, "%s:%d Bad Desc(%d)\n",
+					__func__, __LINE__, desc->txd.cookie);
+			tasklet_schedule(&pch->task);
+		}
+	}
+}
+
+static void pl330_tasklet(unsigned long data)
+{
+	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
+	struct dma_pl330_desc *desc, *_dt;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	/* Pick up ripe tomatoes */
+	list_for_each_entry_safe(desc, _dt, &pch->work_list, node)
+		if (desc->status == DONE) {
+			if (!pch->cyclic)
+				dma_cookie_complete(&desc->txd);
+			list_move_tail(&desc->node, &list);
+		}
+
+	/* Try to submit a req imm. next to the last completed cookie */
+	fill_queue(pch);
+
+	/* Make sure the PL330 Channel thread is active */
+	pl330_chan_ctrl(pch->pl330_chid, PL330_OP_START);
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	if (pch->cyclic)
+		handle_cyclic_desc_list(&list);
+	else
+		free_desc_list(&list);
+}
+
+static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
+{
+	struct dma_pl330_desc *desc = token;
+	struct dma_pl330_chan *pch = desc->pchan;
+	unsigned long flags;
+
+	/* If desc aborted */
+	if (!pch)
+		return;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	desc->status = DONE;
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	tasklet_schedule(&pch->task);
+}
+
+bool pl330_filter(struct dma_chan *chan, void *param)
+{
+	u8 *peri_id;
+
+	if (chan->device->dev->driver != &pl330_driver.drv)
+		return false;
+
+#ifdef CONFIG_OF
+	if (chan->device->dev->of_node) {
+		const __be32 *prop_value;
+		phandle phandle;
+		struct device_node *node;
+
+		prop_value = ((struct property *)param)->value;
+		phandle = be32_to_cpup(prop_value++);
+		node = of_find_node_by_phandle(phandle);
+		return ((chan->private == node) &&
+				(chan->chan_id == be32_to_cpup(prop_value)));
+	}
+#endif
+
+	peri_id = chan->private;
+	return *peri_id == (unsigned)param;
+}
+EXPORT_SYMBOL(pl330_filter);
+
+static int pl330_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct dma_pl330_dmac *pdmac = pch->dmac;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	dma_cookie_init(chan);
+	pch->cyclic = false;
+
+	pch->pl330_chid = pl330_request_channel(&pdmac->pif);
+	if (!pch->pl330_chid) {
+		spin_unlock_irqrestore(&pch->lock, flags);
+		return 0;
+	}
+
+	tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	return 1;
+}
+
+static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct dma_pl330_desc *desc, *_dt;
+	unsigned long flags;
+	struct dma_pl330_dmac *pdmac = pch->dmac;
+	struct dma_slave_config *slave_config;
+	LIST_HEAD(list);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		spin_lock_irqsave(&pch->lock, flags);
+
+		/* FLUSH the PL330 Channel thread */
+		pl330_chan_ctrl(pch->pl330_chid, PL330_OP_FLUSH);
+
+		/* Mark all desc done */
+		list_for_each_entry_safe(desc, _dt, &pch->work_list , node) {
+			desc->status = DONE;
+			list_move_tail(&desc->node, &list);
+		}
+
+		list_splice_tail_init(&list, &pdmac->desc_pool);
+		spin_unlock_irqrestore(&pch->lock, flags);
+		break;
+	case DMA_SLAVE_CONFIG:
+		slave_config = (struct dma_slave_config *)arg;
+
+		if (slave_config->direction == DMA_MEM_TO_DEV) {
+			if (slave_config->dst_addr)
+				pch->fifo_addr = slave_config->dst_addr;
+			if (slave_config->dst_addr_width)
+				pch->burst_sz = __ffs(slave_config->dst_addr_width);
+			if (slave_config->dst_maxburst)
+				pch->burst_len = slave_config->dst_maxburst;
+		} else if (slave_config->direction == DMA_DEV_TO_MEM) {
+			if (slave_config->src_addr)
+				pch->fifo_addr = slave_config->src_addr;
+			if (slave_config->src_addr_width)
+				pch->burst_sz = __ffs(slave_config->src_addr_width);
+			if (slave_config->src_maxburst)
+				pch->burst_len = slave_config->src_maxburst;
+		}
+		break;
+	default:
+		dev_err(pch->dmac->pif.dev, "Not supported command.\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static void pl330_free_chan_resources(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	tasklet_kill(&pch->task);
+
+	pl330_release_channel(pch->pl330_chid);
+	pch->pl330_chid = NULL;
+
+	if (pch->cyclic)
+		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+}
+
+static enum dma_status
+pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		 struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void pl330_issue_pending(struct dma_chan *chan)
+{
+	pl330_tasklet((unsigned long) to_pchan(chan));
+}
+
+/*
+ * We returned the last one of the circular list of descriptor(s)
+ * from prep_xxx, so the argument to submit corresponds to the last
+ * descriptor of the list.
+ */
+static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_pl330_desc *desc, *last = to_desc(tx);
+	struct dma_pl330_chan *pch = to_pchan(tx->chan);
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	/* Assign cookies to all nodes */
+	while (!list_empty(&last->node)) {
+		desc = list_entry(last->node.next, struct dma_pl330_desc, node);
+
+		dma_cookie_assign(&desc->txd);
+
+		list_move_tail(&desc->node, &pch->work_list);
+	}
+
+	cookie = dma_cookie_assign(&last->txd);
+	list_add_tail(&last->node, &pch->work_list);
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	return cookie;
+}
+
+static inline void _init_desc(struct dma_pl330_desc *desc)
+{
+	desc->pchan = NULL;
+	desc->req.x = &desc->px;
+	desc->req.token = desc;
+	desc->rqcfg.swap = SWAP_NO;
+	desc->rqcfg.privileged = 0;
+	desc->rqcfg.insnaccess = 0;
+	desc->rqcfg.scctl = SCCTRL0;
+	desc->rqcfg.dcctl = DCCTRL0;
+	desc->req.cfg = &desc->rqcfg;
+	desc->req.xfer_cb = dma_pl330_rqcb;
+	desc->txd.tx_submit = pl330_tx_submit;
+
+	INIT_LIST_HEAD(&desc->node);
+}
+
+/* Returns the number of descriptors added to the DMAC pool */
+int add_desc(struct dma_pl330_dmac *pdmac, gfp_t flg, int count)
+{
+	struct dma_pl330_desc *desc;
+	unsigned long flags;
+	int i;
+
+	if (!pdmac)
+		return 0;
+
+	desc = kmalloc(count * sizeof(*desc), flg);
+	if (!desc)
+		return 0;
+
+	spin_lock_irqsave(&pdmac->pool_lock, flags);
+
+	for (i = 0; i < count; i++) {
+		_init_desc(&desc[i]);
+		list_add_tail(&desc[i].node, &pdmac->desc_pool);
+	}
+
+	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+
+	return count;
+}
+
+static struct dma_pl330_desc *
+pluck_desc(struct dma_pl330_dmac *pdmac)
+{
+	struct dma_pl330_desc *desc = NULL;
+	unsigned long flags;
+
+	if (!pdmac)
+		return NULL;
+
+	spin_lock_irqsave(&pdmac->pool_lock, flags);
+
+	if (!list_empty(&pdmac->desc_pool)) {
+		desc = list_entry(pdmac->desc_pool.next,
+				struct dma_pl330_desc, node);
+
+		list_del_init(&desc->node);
+
+		desc->status = PREP;
+		desc->txd.callback = NULL;
+	}
+
+	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+
+	return desc;
+}
+
+static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
+{
+	struct dma_pl330_dmac *pdmac = pch->dmac;
+	u8 *peri_id = pch->chan.private;
+	struct dma_pl330_desc *desc;
+
+	/* Pluck one desc from the pool of DMAC */
+	desc = pluck_desc(pdmac);
+
+	/* If the DMAC pool is empty, alloc new */
+	if (!desc) {
+		if (!add_desc(pdmac, GFP_ATOMIC, 1))
+			return NULL;
+
+		/* Try again */
+		desc = pluck_desc(pdmac);
+		if (!desc) {
+			dev_err(pch->dmac->pif.dev,
+				"%s:%d ALERT!\n", __func__, __LINE__);
+			return NULL;
+		}
+	}
+
+	/* Initialize the descriptor */
+	desc->pchan = pch;
+	desc->txd.cookie = 0;
+	async_tx_ack(&desc->txd);
+
+	desc->req.peri = peri_id ? pch->chan.chan_id : 0;
+	desc->rqcfg.pcfg = &pch->dmac->pif.pcfg;
+
+	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
+
+	return desc;
+}
+
+static inline void fill_px(struct pl330_xfer *px,
+		dma_addr_t dst, dma_addr_t src, size_t len)
+{
+	px->next = NULL;
+	px->bytes = len;
+	px->dst_addr = dst;
+	px->src_addr = src;
+}
+
+static struct dma_pl330_desc *
+__pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst,
+		dma_addr_t src, size_t len)
+{
+	struct dma_pl330_desc *desc = pl330_get_desc(pch);
+
+	if (!desc) {
+		dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
+			__func__, __LINE__);
+		return NULL;
+	}
+
+	/*
+	 * Ideally we should lookout for reqs bigger than
+	 * those that can be programmed with 256 bytes of
+	 * MC buffer, but considering a req size is seldom
+	 * going to be word-unaligned and more than 200MB,
+	 * we take it easy.
+	 * Also, should the limit is reached we'd rather
+	 * have the platform increase MC buffer size than
+	 * complicating this API driver.
+	 */
+	fill_px(&desc->px, dst, src, len);
+
+	return desc;
+}
+
+/* Call after fixing burst size */
+static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
+{
+	struct dma_pl330_chan *pch = desc->pchan;
+	struct pl330_info *pi = &pch->dmac->pif;
+	int burst_len;
+
+	burst_len = pi->pcfg.data_bus_width / 8;
+	burst_len *= pi->pcfg.data_buf_dep;
+	burst_len >>= desc->rqcfg.brst_size;
+
+	/* src/dst_burst_len can't be more than 16 */
+	if (burst_len > 16)
+		burst_len = 16;
+
+	while (burst_len > 1) {
+		if (!(len % (burst_len << desc->rqcfg.brst_size)))
+			break;
+		burst_len--;
+	}
+
+	return burst_len;
+}
+
+static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
+{
+	struct dma_pl330_desc *desc;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	dma_addr_t dst;
+	dma_addr_t src;
+
+	desc = pl330_get_desc(pch);
+	if (!desc) {
+		dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
+			__func__, __LINE__);
+		return NULL;
+	}
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		desc->rqcfg.src_inc = 1;
+		desc->rqcfg.dst_inc = 0;
+		desc->req.rqtype = MEMTODEV;
+		src = dma_addr;
+		dst = pch->fifo_addr;
+		break;
+	case DMA_DEV_TO_MEM:
+		desc->rqcfg.src_inc = 0;
+		desc->rqcfg.dst_inc = 1;
+		desc->req.rqtype = DEVTOMEM;
+		src = pch->fifo_addr;
+		dst = dma_addr;
+		break;
+	default:
+		dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n",
+		__func__, __LINE__);
+		return NULL;
+	}
+
+	desc->rqcfg.brst_size = pch->burst_sz;
+	desc->rqcfg.brst_len = 1;
+
+	pch->cyclic = true;
+
+	fill_px(&desc->px, dst, src, period_len);
+
+	return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
+		dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct dma_pl330_desc *desc;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_info *pi;
+	int burst;
+
+	if (unlikely(!pch || !len))
+		return NULL;
+
+	pi = &pch->dmac->pif;
+
+	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
+	if (!desc)
+		return NULL;
+
+	desc->rqcfg.src_inc = 1;
+	desc->rqcfg.dst_inc = 1;
+	desc->req.rqtype = MEMTOMEM;
+
+	/* Select max possible burst size */
+	burst = pi->pcfg.data_bus_width / 8;
+
+	while (burst > 1) {
+		if (!(len % burst))
+			break;
+		burst /= 2;
+	}
+
+	desc->rqcfg.brst_size = 0;
+	while (burst != (1 << desc->rqcfg.brst_size))
+		desc->rqcfg.brst_size++;
+
+	desc->rqcfg.brst_len = get_burst_len(desc, len);
+
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flg, void *context)
+{
+	struct dma_pl330_desc *first, *desc = NULL;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct scatterlist *sg;
+	unsigned long flags;
+	int i;
+	dma_addr_t addr;
+
+	if (unlikely(!pch || !sgl || !sg_len))
+		return NULL;
+
+	addr = pch->fifo_addr;
+
+	first = NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+
+		desc = pl330_get_desc(pch);
+		if (!desc) {
+			struct dma_pl330_dmac *pdmac = pch->dmac;
+
+			dev_err(pch->dmac->pif.dev,
+				"%s:%d Unable to fetch desc\n",
+				__func__, __LINE__);
+			if (!first)
+				return NULL;
+
+			spin_lock_irqsave(&pdmac->pool_lock, flags);
+
+			while (!list_empty(&first->node)) {
+				desc = list_entry(first->node.next,
+						struct dma_pl330_desc, node);
+				list_move_tail(&desc->node, &pdmac->desc_pool);
+			}
+
+			list_move_tail(&first->node, &pdmac->desc_pool);
+
+			spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+
+			return NULL;
+		}
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->node);
+
+		if (direction == DMA_MEM_TO_DEV) {
+			desc->rqcfg.src_inc = 1;
+			desc->rqcfg.dst_inc = 0;
+			desc->req.rqtype = MEMTODEV;
+			fill_px(&desc->px,
+				addr, sg_dma_address(sg), sg_dma_len(sg));
+		} else {
+			desc->rqcfg.src_inc = 0;
+			desc->rqcfg.dst_inc = 1;
+			desc->req.rqtype = DEVTOMEM;
+			fill_px(&desc->px,
+				sg_dma_address(sg), addr, sg_dma_len(sg));
+		}
+
+		desc->rqcfg.brst_size = pch->burst_sz;
+		desc->rqcfg.brst_len = 1;
+	}
+
+	/* Return the last desc in the chain */
+	desc->txd.flags = flg;
+	return &desc->txd;
+}
+
+static irqreturn_t pl330_irq_handler(int irq, void *data)
+{
+	if (pl330_update(data))
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+static int __devinit
+pl330_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	struct dma_pl330_platdata *pdat;
+	struct dma_pl330_dmac *pdmac;
+	struct dma_pl330_chan *pch;
+	struct pl330_info *pi;
+	struct dma_device *pd;
+	struct resource *res;
+	int i, ret, irq;
+	int num_chan;
+
+	pdat = adev->dev.platform_data;
+
+	/* Allocate a new DMAC and its Channels */
+	pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL);
+	if (!pdmac) {
+		dev_err(&adev->dev, "unable to allocate mem\n");
+		return -ENOMEM;
+	}
+
+	pi = &pdmac->pif;
+	pi->dev = &adev->dev;
+	pi->pl330_data = NULL;
+	pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
+
+	res = &adev->res;
+	request_mem_region(res->start, resource_size(res), "dma-pl330");
+
+	pi->base = ioremap(res->start, resource_size(res));
+	if (!pi->base) {
+		ret = -ENXIO;
+		goto probe_err1;
+	}
+
+	pdmac->clk = clk_get(&adev->dev, "dma");
+	if (IS_ERR(pdmac->clk)) {
+		dev_err(&adev->dev, "Cannot get operation clock.\n");
+		ret = -EINVAL;
+		goto probe_err2;
+	}
+
+	amba_set_drvdata(adev, pdmac);
+
+#ifndef CONFIG_PM_RUNTIME
+	/* enable dma clk */
+	clk_enable(pdmac->clk);
+#endif
+
+	irq = adev->irq[0];
+	ret = request_irq(irq, pl330_irq_handler, 0,
+			dev_name(&adev->dev), pi);
+	if (ret)
+		goto probe_err3;
+
+	ret = pl330_add(pi);
+	if (ret)
+		goto probe_err4;
+
+	INIT_LIST_HEAD(&pdmac->desc_pool);
+	spin_lock_init(&pdmac->pool_lock);
+
+	/* Create a descriptor pool of default size */
+	if (!add_desc(pdmac, GFP_KERNEL, NR_DEFAULT_DESC))
+		dev_warn(&adev->dev, "unable to allocate desc\n");
+
+	pd = &pdmac->ddma;
+	INIT_LIST_HEAD(&pd->channels);
+
+	/* Initialize channel parameters */
+	if (pdat)
+		num_chan = max_t(int, pdat->nr_valid_peri, pi->pcfg.num_chan);
+	else
+		num_chan = max_t(int, pi->pcfg.num_peri, pi->pcfg.num_chan);
+
+	pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
+
+	for (i = 0; i < num_chan; i++) {
+		pch = &pdmac->peripherals[i];
+		if (!adev->dev.of_node)
+			pch->chan.private = pdat ? &pdat->peri_id[i] : NULL;
+		else
+			pch->chan.private = adev->dev.of_node;
+
+		INIT_LIST_HEAD(&pch->work_list);
+		spin_lock_init(&pch->lock);
+		pch->pl330_chid = NULL;
+		pch->chan.device = pd;
+		pch->dmac = pdmac;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&pch->chan.device_node, &pd->channels);
+	}
+
+	pd->dev = &adev->dev;
+	if (pdat) {
+		pd->cap_mask = pdat->cap_mask;
+	} else {
+		dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+		if (pi->pcfg.num_peri) {
+			dma_cap_set(DMA_SLAVE, pd->cap_mask);
+			dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+		}
+	}
+
+	pd->device_alloc_chan_resources = pl330_alloc_chan_resources;
+	pd->device_free_chan_resources = pl330_free_chan_resources;
+	pd->device_prep_dma_memcpy = pl330_prep_dma_memcpy;
+	pd->device_prep_dma_cyclic = pl330_prep_dma_cyclic;
+	pd->device_tx_status = pl330_tx_status;
+	pd->device_prep_slave_sg = pl330_prep_slave_sg;
+	pd->device_control = pl330_control;
+	pd->device_issue_pending = pl330_issue_pending;
+
+	ret = dma_async_device_register(pd);
+	if (ret) {
+		dev_err(&adev->dev, "unable to register DMAC\n");
+		goto probe_err5;
+	}
+
+	dev_info(&adev->dev,
+		"Loaded driver for PL330 DMAC-%d\n", adev->periphid);
+	dev_info(&adev->dev,
+		"\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n",
+		pi->pcfg.data_buf_dep,
+		pi->pcfg.data_bus_width / 8, pi->pcfg.num_chan,
+		pi->pcfg.num_peri, pi->pcfg.num_events);
+
+	return 0;
+
+probe_err5:
+	pl330_del(pi);
+probe_err4:
+	free_irq(irq, pi);
+probe_err3:
+#ifndef CONFIG_PM_RUNTIME
+	clk_disable(pdmac->clk);
+#endif
+	clk_put(pdmac->clk);
+probe_err2:
+	iounmap(pi->base);
+probe_err1:
+	release_mem_region(res->start, resource_size(res));
+	kfree(pdmac);
+
+	return ret;
+}
+
+static int __devexit pl330_remove(struct amba_device *adev)
+{
+	struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
+	struct dma_pl330_chan *pch, *_p;
+	struct pl330_info *pi;
+	struct resource *res;
+	int irq;
+
+	if (!pdmac)
+		return 0;
+
+	amba_set_drvdata(adev, NULL);
+
+	/* Idle the DMAC */
+	list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
+			chan.device_node) {
+
+		/* Remove the channel */
+		list_del(&pch->chan.device_node);
+
+		/* Flush the channel */
+		pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0);
+		pl330_free_chan_resources(&pch->chan);
+	}
+
+	pi = &pdmac->pif;
+
+	pl330_del(pi);
+
+	irq = adev->irq[0];
+	free_irq(irq, pi);
+
+	iounmap(pi->base);
+
+	res = &adev->res;
+	release_mem_region(res->start, resource_size(res));
+
+#ifndef CONFIG_PM_RUNTIME
+	clk_disable(pdmac->clk);
+#endif
+
+	kfree(pdmac);
+
+	return 0;
+}
+
+static struct amba_id pl330_ids[] = {
+	{
+		.id	= 0x00041330,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl330_ids);
+
+#ifdef CONFIG_PM_RUNTIME
+static int pl330_runtime_suspend(struct device *dev)
+{
+	struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev);
+
+	if (!pdmac) {
+		dev_err(dev, "failed to get dmac\n");
+		return -ENODEV;
+	}
+
+	clk_disable(pdmac->clk);
+
+	return 0;
+}
+
+static int pl330_runtime_resume(struct device *dev)
+{
+	struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev);
+
+	if (!pdmac) {
+		dev_err(dev, "failed to get dmac\n");
+		return -ENODEV;
+	}
+
+	clk_enable(pdmac->clk);
+
+	return 0;
+}
+#else
+#define pl330_runtime_suspend	NULL
+#define pl330_runtime_resume	NULL
+#endif /* CONFIG_PM_RUNTIME */
+
+static const struct dev_pm_ops pl330_pm_ops = {
+	.runtime_suspend = pl330_runtime_suspend,
+	.runtime_resume = pl330_runtime_resume,
+};
+
+static struct amba_driver pl330_driver = {
+	.drv = {
+		.owner = THIS_MODULE,
+		.name = "dma-pl330",
+		.pm = &pl330_pm_ops,
+	},
+	.id_table = pl330_ids,
+	.probe = pl330_probe,
+	.remove = pl330_remove,
+};
+
+module_amba_driver(pl330_driver);
+
+MODULE_AUTHOR("Jaswinder Singh <jassi.brar@samsung.com>");
+MODULE_DESCRIPTION("API Driver for PL330 DMAC");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile
new file mode 100644
index 00000000..b3d259b3
--- /dev/null
+++ b/drivers/dma/ppc4xx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
new file mode 100644
index 00000000..ced98826
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.c
@@ -0,0 +1,4992 @@
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * Further porting to arch/powerpc by
+ * 	Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ * ADMA driver written by D.Williams.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include "adma.h"
+#include "../dmaengine.h"
+
+enum ppc_adma_init_code {
+	PPC_ADMA_INIT_OK = 0,
+	PPC_ADMA_INIT_MEMRES,
+	PPC_ADMA_INIT_MEMREG,
+	PPC_ADMA_INIT_ALLOC,
+	PPC_ADMA_INIT_COHERENT,
+	PPC_ADMA_INIT_CHANNEL,
+	PPC_ADMA_INIT_IRQ1,
+	PPC_ADMA_INIT_IRQ2,
+	PPC_ADMA_INIT_REGISTER
+};
+
+static char *ppc_adma_errors[] = {
+	[PPC_ADMA_INIT_OK] = "ok",
+	[PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
+	[PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
+	[PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
+				"structure",
+	[PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
+				   "hardware descriptors",
+	[PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
+	[PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
+	[PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
+	[PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
+};
+
+static enum ppc_adma_init_code
+ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
+
+struct ppc_dma_chan_ref {
+	struct dma_chan *chan;
+	struct list_head node;
+};
+
+/* The list of channels exported by ppc440spe ADMA */
+struct list_head
+ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list);
+
+/* This flag is set when want to refetch the xor chain in the interrupt
+ * handler
+ */
+static u32 do_xor_refetch;
+
+/* Pointer to DMA0, DMA1 CP/CS FIFO */
+static void *ppc440spe_dma_fifo_buf;
+
+/* Pointers to last submitted to DMA0, DMA1 CDBs */
+static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
+static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
+
+/* Pointer to last linked and submitted xor CB */
+static struct ppc440spe_adma_desc_slot *xor_last_linked;
+static struct ppc440spe_adma_desc_slot *xor_last_submit;
+
+/* This array is used in data-check operations for storing a pattern */
+static char ppc440spe_qword[16];
+
+static atomic_t ppc440spe_adma_err_irq_ref;
+static dcr_host_t ppc440spe_mq_dcr_host;
+static unsigned int ppc440spe_mq_dcr_len;
+
+/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
+ * the block size in transactions, then we do not allow to activate more than
+ * only one RXOR transactions simultaneously. So use this var to store
+ * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
+ * set) or not (PPC440SPE_RXOR_RUN is clear).
+ */
+static unsigned long ppc440spe_rxor_state;
+
+/* These are used in enable & check routines
+ */
+static u32 ppc440spe_r6_enabled;
+static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
+static struct completion ppc440spe_r6_test_comp;
+
+static int ppc440spe_adma_dma2rxor_prep_src(
+		struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_rxor *cursor, int index,
+		int src_cnt, u32 addr);
+static void ppc440spe_adma_dma2rxor_set_src(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, dma_addr_t addr);
+static void ppc440spe_adma_dma2rxor_set_mult(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, u8 mult);
+
+#ifdef ADMA_LL_DEBUG
+#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
+#else
+#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
+#endif
+
+static void print_cb(struct ppc440spe_adma_chan *chan, void *block)
+{
+	struct dma_cdb *cdb;
+	struct xor_cb *cb;
+	int i;
+
+	switch (chan->device->id) {
+	case 0:
+	case 1:
+		cdb = block;
+
+		pr_debug("CDB at %p [%d]:\n"
+			"\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
+			"\t sg1u 0x%08x sg1l 0x%08x\n"
+			"\t sg2u 0x%08x sg2l 0x%08x\n"
+			"\t sg3u 0x%08x sg3l 0x%08x\n",
+			cdb, chan->device->id,
+			cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
+			le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
+			le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
+			le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
+		);
+		break;
+	case 2:
+		cb = block;
+
+		pr_debug("CB at %p [%d]:\n"
+			"\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
+			"\t cbtah 0x%08x cbtal 0x%08x\n"
+			"\t cblah 0x%08x cblal 0x%08x\n",
+			cb, chan->device->id,
+			cb->cbc, cb->cbbc, cb->cbs,
+			cb->cbtah, cb->cbtal,
+			cb->cblah, cb->cblal);
+		for (i = 0; i < 16; i++) {
+			if (i && !cb->ops[i].h && !cb->ops[i].l)
+				continue;
+			pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
+				i, cb->ops[i].h, cb->ops[i].l);
+		}
+		break;
+	}
+}
+
+static void print_cb_list(struct ppc440spe_adma_chan *chan,
+			  struct ppc440spe_adma_desc_slot *iter)
+{
+	for (; iter; iter = iter->hw_next)
+		print_cb(chan, iter->hw_desc);
+}
+
+static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
+			     unsigned int src_cnt)
+{
+	int i;
+
+	pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+	for (i = 0; i < src_cnt; i++)
+		pr_debug("\t0x%016llx ", src[i]);
+	pr_debug("dst:\n\t0x%016llx\n", dst);
+}
+
+static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
+			    unsigned int src_cnt)
+{
+	int i;
+
+	pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+	for (i = 0; i < src_cnt; i++)
+		pr_debug("\t0x%016llx ", src[i]);
+	pr_debug("dst: ");
+	for (i = 0; i < 2; i++)
+		pr_debug("\t0x%016llx ", dst[i]);
+}
+
+static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
+				    unsigned int src_cnt,
+				    const unsigned char *scf)
+{
+	int i;
+
+	pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id);
+	if (scf) {
+		for (i = 0; i < src_cnt; i++)
+			pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]);
+	} else {
+		for (i = 0; i < src_cnt; i++)
+			pr_debug("\t0x%016llx(no) ", src[i]);
+	}
+
+	pr_debug("dst: ");
+	for (i = 0; i < 2; i++)
+		pr_debug("\t0x%016llx ", src[src_cnt + i]);
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc,
+					  struct ppc440spe_adma_chan *chan)
+{
+	struct xor_cb *p;
+
+	switch (chan->device->id) {
+	case PPC440SPE_XOR_ID:
+		p = desc->hw_desc;
+		memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+		/* NOP with Command Block Complete Enable */
+		p->cbc = XOR_CBCR_CBCE_BIT;
+		break;
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+		/* NOP with interrupt */
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+		break;
+	default:
+		printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
+				__func__);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
+{
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 0;
+	desc->dst_cnt = 1;
+}
+
+/**
+ * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
+ */
+static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
+					 int src_cnt, unsigned long flags)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = 1;
+
+	hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Enable interrupt on completion */
+		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+/**
+ * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
+ * operation in DMA2 controller
+ */
+static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
+		int dst_cnt, int src_cnt, unsigned long flags)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+	memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
+	desc->descs_per_op = 0;
+
+	hw_desc->cbc = XOR_CBCR_TGT_BIT;
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Enable interrupt on completion */
+		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+#define DMA_CTRL_FLAGS_LAST	DMA_PREP_FENCE
+#define DMA_PREP_ZERO_P		(DMA_CTRL_FLAGS_LAST << 1)
+#define DMA_PREP_ZERO_Q		(DMA_PREP_ZERO_P << 1)
+
+/**
+ * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
+ * with DMA0/1
+ */
+static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
+				int dst_cnt, int src_cnt, unsigned long flags,
+				unsigned long op)
+{
+	struct dma_cdb *hw_desc;
+	struct ppc440spe_adma_desc_slot *iter;
+	u8 dopc;
+
+	/* Common initialization of a PQ descriptors chain */
+	set_bits(op, &desc->flags);
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+
+	/* WXOR MULTICAST if both P and Q are being computed
+	 * MV_SG1_SG2 if Q only
+	 */
+	dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
+		DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
+
+	list_for_each_entry(iter, &desc->group_list, chain_node) {
+		hw_desc = iter->hw_desc;
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+
+		if (likely(!list_is_last(&iter->chain_node,
+				&desc->group_list))) {
+			/* set 'next' pointer */
+			iter->hw_next = list_entry(iter->chain_node.next,
+				struct ppc440spe_adma_desc_slot, chain_node);
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		} else {
+			/* this is the last descriptor.
+			 * this slot will be pasted from ADMA level
+			 * each time it wants to configure parameters
+			 * of the transaction (src, dst, ...)
+			 */
+			iter->hw_next = NULL;
+			if (flags & DMA_PREP_INTERRUPT)
+				set_bit(PPC440SPE_DESC_INT, &iter->flags);
+			else
+				clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		}
+	}
+
+	/* Set OPS depending on WXOR/RXOR type of operation */
+	if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
+		/* This is a WXOR only chain:
+		 * - first descriptors are for zeroing destinations
+		 *   if PPC440SPE_ZERO_P/Q set;
+		 * - descriptors remained are for GF-XOR operations.
+		 */
+		iter = list_first_entry(&desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+
+		if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		}
+
+		if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		}
+
+		list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = dopc;
+		}
+	} else {
+		/* This is either RXOR-only or mixed RXOR/WXOR */
+
+		/* The first 1 or 2 slots in chain are always RXOR,
+		 * if need to calculate P & Q, then there are two
+		 * RXOR slots; if only P or only Q, then there is one
+		 */
+		iter = list_first_entry(&desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+
+		if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		}
+
+		/* The remaining descs (if any) are WXORs */
+		if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			list_for_each_entry_from(iter, &desc->group_list,
+						chain_node) {
+				hw_desc = iter->hw_desc;
+				hw_desc->opc = dopc;
+			}
+		}
+	}
+}
+
+/**
+ * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
+ * for PQ_ZERO_SUM operation
+ */
+static void ppc440spe_desc_init_dma01pqzero_sum(
+				struct ppc440spe_adma_desc_slot *desc,
+				int dst_cnt, int src_cnt)
+{
+	struct dma_cdb *hw_desc;
+	struct ppc440spe_adma_desc_slot *iter;
+	int i = 0;
+	u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
+				   DMA_CDB_OPC_MV_SG1_SG2;
+	/*
+	 * Initialize starting from 2nd or 3rd descriptor dependent
+	 * on dst_cnt. First one or two slots are for cloning P
+	 * and/or Q to chan->pdest and/or chan->qdest as we have
+	 * to preserve original P/Q.
+	 */
+	iter = list_first_entry(&desc->group_list,
+				struct ppc440spe_adma_desc_slot, chain_node);
+	iter = list_entry(iter->chain_node.next,
+			  struct ppc440spe_adma_desc_slot, chain_node);
+
+	if (dst_cnt > 1) {
+		iter = list_entry(iter->chain_node.next,
+				  struct ppc440spe_adma_desc_slot, chain_node);
+	}
+	/* initialize each source descriptor in chain */
+	list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+		hw_desc = iter->hw_desc;
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->src_cnt = 0;
+		iter->dst_cnt = 0;
+
+		/* This is a ZERO_SUM operation:
+		 * - <src_cnt> descriptors starting from 2nd or 3rd
+		 *   descriptor are for GF-XOR operations;
+		 * - remaining <dst_cnt> descriptors are for checking the result
+		 */
+		if (i++ < src_cnt)
+			/* MV_SG1_SG2 if only Q is being verified
+			 * MULTICAST if both P and Q are being verified
+			 */
+			hw_desc->opc = dopc;
+		else
+			/* DMA_CDB_OPC_DCHECK128 operation */
+			hw_desc->opc = DMA_CDB_OPC_DCHECK128;
+
+		if (likely(!list_is_last(&iter->chain_node,
+					 &desc->group_list))) {
+			/* set 'next' pointer */
+			iter->hw_next = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+		} else {
+			/* this is the last descriptor.
+			 * this slot will be pasted from ADMA level
+			 * each time it wants to configure parameters
+			 * of the transaction (src, dst, ...)
+			 */
+			iter->hw_next = NULL;
+			/* always enable interrupt generation since we get
+			 * the status of pqzero from the handler
+			 */
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		}
+	}
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+}
+
+/**
+ * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
+ */
+static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
+					unsigned long flags)
+{
+	struct dma_cdb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 1;
+	desc->dst_cnt = 1;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+	else
+		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+	hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+}
+
+/**
+ * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation
+ */
+static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc,
+					int value, unsigned long flags)
+{
+	struct dma_cdb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 1;
+	desc->dst_cnt = 1;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+	else
+		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+	hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value);
+	hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value);
+	hw_desc->opc = DMA_CDB_OPC_DFILL128;
+}
+
+/**
+ * ppc440spe_desc_set_src_addr - set source address into the descriptor
+ */
+static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
+					struct ppc440spe_adma_chan *chan,
+					int src_idx, dma_addr_t addrh,
+					dma_addr_t addrl)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	phys_addr_t addr64, tmplow, tmphi;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (!addrh) {
+			addr64 = addrl;
+			tmphi = (addr64 >> 32);
+			tmplow = (addr64 & 0xFFFFFFFF);
+		} else {
+			tmphi = addrh;
+			tmplow = addrl;
+		}
+		dma_hw_desc = desc->hw_desc;
+		dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow);
+		dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->ops[src_idx].l = addrl;
+		xor_hw_desc->ops[src_idx].h |= addrh;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
+ */
+static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
+			struct ppc440spe_adma_chan *chan, u32 mult_index,
+			int sg_index, unsigned char mult_value)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	u32 *psgu;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		switch (sg_index) {
+		/* for RXOR operations set multiplier
+		 * into source cued address
+		 */
+		case DMA_CDB_SG_SRC:
+			psgu = &dma_hw_desc->sg1u;
+			break;
+		/* for WXOR operations set multiplier
+		 * into destination cued address(es)
+		 */
+		case DMA_CDB_SG_DST1:
+			psgu = &dma_hw_desc->sg2u;
+			break;
+		case DMA_CDB_SG_DST2:
+			psgu = &dma_hw_desc->sg3u;
+			break;
+		default:
+			BUG();
+		}
+
+		*psgu |= cpu_to_le32(mult_value << mult_index);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
+ */
+static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan,
+				dma_addr_t addrh, dma_addr_t addrl,
+				u32 dst_idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	phys_addr_t addr64, tmphi, tmplow;
+	u32 *psgu, *psgl;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (!addrh) {
+			addr64 = addrl;
+			tmphi = (addr64 >> 32);
+			tmplow = (addr64 & 0xFFFFFFFF);
+		} else {
+			tmphi = addrh;
+			tmplow = addrl;
+		}
+		dma_hw_desc = desc->hw_desc;
+
+		psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
+		psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
+
+		*psgl = cpu_to_le32((u32)tmplow);
+		*psgu |= cpu_to_le32((u32)tmphi);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->cbtal = addrl;
+		xor_hw_desc->cbtah |= addrh;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_byte_count - set number of data bytes involved
+ * into the operation
+ */
+static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan,
+				u32 byte_count)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		dma_hw_desc->cnt = cpu_to_le32(byte_count);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->cbbc = byte_count;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_rxor_block_size - set RXOR block size
+ */
+static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
+{
+	/* assume that byte_count is aligned on the 512-boundary;
+	 * thus write it directly to the register (bits 23:31 are
+	 * reserved there).
+	 */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
+}
+
+/**
+ * ppc440spe_desc_set_dcheck - set CHECK pattern
+ */
+static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan, u8 *qword)
+{
+	struct dma_cdb *dma_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		iowrite32(qword[0], &dma_hw_desc->sg3l);
+		iowrite32(qword[4], &dma_hw_desc->sg3u);
+		iowrite32(qword[8], &dma_hw_desc->sg2l);
+		iowrite32(qword[12], &dma_hw_desc->sg2u);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * ppc440spe_xor_set_link - set link address in xor CB
+ */
+static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
+				struct ppc440spe_adma_desc_slot *next_desc)
+{
+	struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
+
+	if (unlikely(!next_desc || !(next_desc->phys))) {
+		printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
+			__func__, next_desc,
+			next_desc ? next_desc->phys : 0);
+		BUG();
+	}
+
+	xor_hw_desc->cbs = 0;
+	xor_hw_desc->cblal = next_desc->phys;
+	xor_hw_desc->cblah = 0;
+	xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
+}
+
+/**
+ * ppc440spe_desc_set_link - set the address of descriptor following this
+ * descriptor in chain
+ */
+static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
+				struct ppc440spe_adma_desc_slot *prev_desc,
+				struct ppc440spe_adma_desc_slot *next_desc)
+{
+	unsigned long flags;
+	struct ppc440spe_adma_desc_slot *tail = next_desc;
+
+	if (unlikely(!prev_desc || !next_desc ||
+		(prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
+		/* If previous next is overwritten something is wrong.
+		 * though we may refetch from append to initiate list
+		 * processing; in this case - it's ok.
+		 */
+		printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
+			"prev->hw_next=0x%p\n", __func__, prev_desc,
+			next_desc, prev_desc ? prev_desc->hw_next : 0);
+		BUG();
+	}
+
+	local_irq_save(flags);
+
+	/* do s/w chaining both for DMA and XOR descriptors */
+	prev_desc->hw_next = next_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		break;
+	case PPC440SPE_XOR_ID:
+		/* bind descriptor to the chain */
+		while (tail->hw_next)
+			tail = tail->hw_next;
+		xor_last_linked = tail;
+
+		if (prev_desc == xor_last_submit)
+			/* do not link to the last submitted CB */
+			break;
+		ppc440spe_xor_set_link(prev_desc, next_desc);
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
+ */
+static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan, int src_idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		/* May have 0, 1, 2, or 3 sources */
+		switch (dma_hw_desc->opc) {
+		case DMA_CDB_OPC_NO_OP:
+		case DMA_CDB_OPC_DFILL128:
+			return 0;
+		case DMA_CDB_OPC_DCHECK128:
+			if (unlikely(src_idx)) {
+				printk(KERN_ERR "%s: try to get %d source for"
+				    " DCHECK128\n", __func__, src_idx);
+				BUG();
+			}
+			return le32_to_cpu(dma_hw_desc->sg1l);
+		case DMA_CDB_OPC_MULTICAST:
+		case DMA_CDB_OPC_MV_SG1_SG2:
+			if (unlikely(src_idx > 2)) {
+				printk(KERN_ERR "%s: try to get %d source from"
+				    " DMA descr\n", __func__, src_idx);
+				BUG();
+			}
+			if (src_idx) {
+				if (le32_to_cpu(dma_hw_desc->sg1u) &
+				    DMA_CUED_XOR_WIN_MSK) {
+					u8 region;
+
+					if (src_idx == 1)
+						return le32_to_cpu(
+						    dma_hw_desc->sg1l) +
+							desc->unmap_len;
+
+					region = (le32_to_cpu(
+					    dma_hw_desc->sg1u)) >>
+						DMA_CUED_REGION_OFF;
+
+					region &= DMA_CUED_REGION_MSK;
+					switch (region) {
+					case DMA_RXOR123:
+						return le32_to_cpu(
+						    dma_hw_desc->sg1l) +
+							(desc->unmap_len << 1);
+					case DMA_RXOR124:
+						return le32_to_cpu(
+						    dma_hw_desc->sg1l) +
+							(desc->unmap_len * 3);
+					case DMA_RXOR125:
+						return le32_to_cpu(
+						    dma_hw_desc->sg1l) +
+							(desc->unmap_len << 2);
+					default:
+						printk(KERN_ERR
+						    "%s: try to"
+						    " get src3 for region %02x"
+						    "PPC440SPE_DESC_RXOR12?\n",
+						    __func__, region);
+						BUG();
+					}
+				} else {
+					printk(KERN_ERR
+						"%s: try to get %d"
+						" source for non-cued descr\n",
+						__func__, src_idx);
+					BUG();
+				}
+			}
+			return le32_to_cpu(dma_hw_desc->sg1l);
+		default:
+			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+				__func__, dma_hw_desc->opc);
+			BUG();
+		}
+		return le32_to_cpu(dma_hw_desc->sg1l);
+	case PPC440SPE_XOR_ID:
+		/* May have up to 16 sources */
+		xor_hw_desc = desc->hw_desc;
+		return xor_hw_desc->ops[src_idx].l;
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_dest_addr - extract the destination address from the
+ * descriptor
+ */
+static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan, int idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		if (likely(!idx))
+			return le32_to_cpu(dma_hw_desc->sg2l);
+		return le32_to_cpu(dma_hw_desc->sg3l);
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		return xor_hw_desc->cbtal;
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_src_num - extract the number of source addresses from
+ * the descriptor
+ */
+static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		switch (dma_hw_desc->opc) {
+		case DMA_CDB_OPC_NO_OP:
+		case DMA_CDB_OPC_DFILL128:
+			return 0;
+		case DMA_CDB_OPC_DCHECK128:
+			return 1;
+		case DMA_CDB_OPC_MV_SG1_SG2:
+		case DMA_CDB_OPC_MULTICAST:
+			/*
+			 * Only for RXOR operations we have more than
+			 * one source
+			 */
+			if (le32_to_cpu(dma_hw_desc->sg1u) &
+			    DMA_CUED_XOR_WIN_MSK) {
+				/* RXOR op, there are 2 or 3 sources */
+				if (((le32_to_cpu(dma_hw_desc->sg1u) >>
+				    DMA_CUED_REGION_OFF) &
+				      DMA_CUED_REGION_MSK) == DMA_RXOR12) {
+					/* RXOR 1-2 */
+					return 2;
+				} else {
+					/* RXOR 1-2-3/1-2-4/1-2-5 */
+					return 3;
+				}
+			}
+			return 1;
+		default:
+			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+				__func__, dma_hw_desc->opc);
+			BUG();
+		}
+	case PPC440SPE_XOR_ID:
+		/* up to 16 sources */
+		xor_hw_desc = desc->hw_desc;
+		return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_dst_num - get the number of destination addresses in
+ * this descriptor
+ */
+static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan)
+{
+	struct dma_cdb *dma_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* May be 1 or 2 destinations */
+		dma_hw_desc = desc->hw_desc;
+		switch (dma_hw_desc->opc) {
+		case DMA_CDB_OPC_NO_OP:
+		case DMA_CDB_OPC_DCHECK128:
+			return 0;
+		case DMA_CDB_OPC_MV_SG1_SG2:
+		case DMA_CDB_OPC_DFILL128:
+			return 1;
+		case DMA_CDB_OPC_MULTICAST:
+			if (desc->dst_cnt == 2)
+				return 2;
+			else
+				return 1;
+		default:
+			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
+				__func__, dma_hw_desc->opc);
+			BUG();
+		}
+	case PPC440SPE_XOR_ID:
+		/* Always only 1 destination */
+		return 1;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_desc_get_link - get the address of the descriptor that
+ * follows this one
+ */
+static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc,
+					struct ppc440spe_adma_chan *chan)
+{
+	if (!desc->hw_next)
+		return 0;
+
+	return desc->hw_next->phys;
+}
+
+/**
+ * ppc440spe_desc_is_aligned - check alignment
+ */
+static inline int ppc440spe_desc_is_aligned(
+	struct ppc440spe_adma_desc_slot *desc, int num_slots)
+{
+	return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/**
+ * ppc440spe_chan_xor_slot_count - get the number of slots necessary for
+ * XOR operation
+ */
+static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt,
+			int *slots_per_op)
+{
+	int slot_cnt;
+
+	/* each XOR descriptor provides up to 16 source operands */
+	slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS;
+
+	if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT))
+		return slot_cnt;
+
+	printk(KERN_ERR "%s: len %d > max %d !!\n",
+		__func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+	BUG();
+	return slot_cnt;
+}
+
+/**
+ * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
+ * DMA2 PQ operation
+ */
+static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs,
+		int src_cnt, size_t len)
+{
+	signed long long order = 0;
+	int state = 0;
+	int addr_count = 0;
+	int i;
+	for (i = 1; i < src_cnt; i++) {
+		dma_addr_t cur_addr = srcs[i];
+		dma_addr_t old_addr = srcs[i-1];
+		switch (state) {
+		case 0:
+			if (cur_addr == old_addr + len) {
+				/* direct RXOR */
+				order = 1;
+				state = 1;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else if (old_addr == cur_addr + len) {
+				/* reverse RXOR */
+				order = -1;
+				state = 1;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else {
+				state = 3;
+			}
+			break;
+		case 1:
+			if (i == src_cnt-2 || (order == -1
+				&& cur_addr != old_addr - len)) {
+				order = 0;
+				state = 0;
+				addr_count++;
+			} else if (cur_addr == old_addr + len*order) {
+				state = 2;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else if (cur_addr == old_addr + 2*len) {
+				state = 2;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else if (cur_addr == old_addr + 3*len) {
+				state = 2;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else {
+				order = 0;
+				state = 0;
+				addr_count++;
+			}
+			break;
+		case 2:
+			order = 0;
+			state = 0;
+			addr_count++;
+				break;
+		}
+		if (state == 3)
+			break;
+	}
+	if (src_cnt <= 1 || (state != 1 && state != 2)) {
+		pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
+			__func__, src_cnt, state, addr_count, order);
+		for (i = 0; i < src_cnt; i++)
+			pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
+		BUG();
+	}
+
+	return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
+}
+
+
+/******************************************************************************
+ * ADMA channel low-level routines
+ ******************************************************************************/
+
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan);
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan);
+
+/**
+ * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
+ */
+static void ppc440spe_adma_device_clear_eot_status(
+					struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+	u8 *p = chan->device->dma_desc_pool_virt;
+	struct dma_cdb *cdb;
+	u32 rv, i;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* read FIFO to ack */
+		dma_reg = chan->device->dma_reg;
+		while ((rv = ioread32(&dma_reg->csfpl))) {
+			i = rv & DMA_CDB_ADDR_MSK;
+			cdb = (struct dma_cdb *)&p[i -
+			    (u32)chan->device->dma_desc_pool];
+
+			/* Clear opcode to ack. This is necessary for
+			 * ZeroSum operations only
+			 */
+			cdb->opc = 0;
+
+			if (test_bit(PPC440SPE_RXOR_RUN,
+			    &ppc440spe_rxor_state)) {
+				/* probably this is a completed RXOR op,
+				 * get pointer to CDB using the fact that
+				 * physical and virtual addresses of CDB
+				 * in pools have the same offsets
+				 */
+				if (le32_to_cpu(cdb->sg1u) &
+				    DMA_CUED_XOR_BASE) {
+					/* this is a RXOR */
+					clear_bit(PPC440SPE_RXOR_RUN,
+						  &ppc440spe_rxor_state);
+				}
+			}
+
+			if (rv & DMA_CDB_STATUS_MSK) {
+				/* ZeroSum check failed
+				 */
+				struct ppc440spe_adma_desc_slot *iter;
+				dma_addr_t phys = rv & ~DMA_CDB_MSK;
+
+				/*
+				 * Update the status of corresponding
+				 * descriptor.
+				 */
+				list_for_each_entry(iter, &chan->chain,
+				    chain_node) {
+					if (iter->phys == phys)
+						break;
+				}
+				/*
+				 * if cannot find the corresponding
+				 * slot it's a bug
+				 */
+				BUG_ON(&iter->chain_node == &chan->chain);
+
+				if (iter->xor_check_result) {
+					if (test_bit(PPC440SPE_DESC_PCHECK,
+						     &iter->flags)) {
+						*iter->xor_check_result |=
+							SUM_CHECK_P_RESULT;
+					} else
+					if (test_bit(PPC440SPE_DESC_QCHECK,
+						     &iter->flags)) {
+						*iter->xor_check_result |=
+							SUM_CHECK_Q_RESULT;
+					} else
+						BUG();
+				}
+			}
+		}
+
+		rv = ioread32(&dma_reg->dsts);
+		if (rv) {
+			pr_err("DMA%d err status: 0x%x\n",
+			       chan->device->id, rv);
+			/* write back to clear */
+			iowrite32(rv, &dma_reg->dsts);
+		}
+		break;
+	case PPC440SPE_XOR_ID:
+		/* reset status bits to ack */
+		xor_reg = chan->device->xor_reg;
+		rv = ioread32be(&xor_reg->sr);
+		iowrite32be(rv, &xor_reg->sr);
+
+		if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) {
+			if (rv & XOR_IE_RPTIE_BIT) {
+				/* Read PLB Timeout Error.
+				 * Try to resubmit the CB
+				 */
+				u32 val = ioread32be(&xor_reg->ccbalr);
+
+				iowrite32be(val, &xor_reg->cblalr);
+
+				val = ioread32be(&xor_reg->crsr);
+				iowrite32be(val | XOR_CRSR_XAE_BIT,
+					    &xor_reg->crsr);
+			} else
+				pr_err("XOR ERR 0x%x status\n", rv);
+			break;
+		}
+
+		/*  if the XORcore is idle, but there are unprocessed CBs
+		 * then refetch the s/w chain here
+		 */
+		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
+		    do_xor_refetch)
+			ppc440spe_chan_append(chan);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_chan_is_busy - get the channel status
+ */
+static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+	int busy = 0;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_reg = chan->device->dma_reg;
+		/*  if command FIFO's head and tail pointers are equal and
+		 * status tail is the same as command, then channel is free
+		 */
+		if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
+		    ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
+			busy = 1;
+		break;
+	case PPC440SPE_XOR_ID:
+		/* use the special status bit for the XORcore
+		 */
+		xor_reg = chan->device->xor_reg;
+		busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
+		break;
+	}
+
+	return busy;
+}
+
+/**
+ * ppc440spe_chan_set_first_xor_descriptor -  init XORcore chain
+ */
+static void ppc440spe_chan_set_first_xor_descriptor(
+				struct ppc440spe_adma_chan *chan,
+				struct ppc440spe_adma_desc_slot *next_desc)
+{
+	struct xor_regs *xor_reg = chan->device->xor_reg;
+
+	if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
+		printk(KERN_INFO "%s: Warn: XORcore is running "
+			"when try to set the first CDB!\n",
+			__func__);
+
+	xor_last_submit = xor_last_linked = next_desc;
+
+	iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
+
+	iowrite32be(next_desc->phys, &xor_reg->cblalr);
+	iowrite32be(0, &xor_reg->cblahr);
+	iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
+		    &xor_reg->cbcr);
+
+	chan->hw_chain_inited = 1;
+}
+
+/**
+ * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
+ * called with irqs disabled
+ */
+static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
+		struct ppc440spe_adma_desc_slot *desc)
+{
+	u32 pcdb;
+	struct dma_regs *dma_reg = chan->device->dma_reg;
+
+	pcdb = desc->phys;
+	if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
+		pcdb |= DMA_CDB_NO_INT;
+
+	chan_last_sub[chan->device->id] = desc;
+
+	ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
+
+	iowrite32(pcdb, &dma_reg->cpfpl);
+}
+
+/**
+ * ppc440spe_chan_append - update the h/w chain in the channel
+ */
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
+{
+	struct xor_regs *xor_reg;
+	struct ppc440spe_adma_desc_slot *iter;
+	struct xor_cb *xcb;
+	u32 cur_desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		cur_desc = ppc440spe_chan_get_current_descriptor(chan);
+
+		if (likely(cur_desc)) {
+			iter = chan_last_sub[chan->device->id];
+			BUG_ON(!iter);
+		} else {
+			/* first peer */
+			iter = chan_first_cdb[chan->device->id];
+			BUG_ON(!iter);
+			ppc440spe_dma_put_desc(chan, iter);
+			chan->hw_chain_inited = 1;
+		}
+
+		/* is there something new to append */
+		if (!iter->hw_next)
+			break;
+
+		/* flush descriptors from the s/w queue to fifo */
+		list_for_each_entry_continue(iter, &chan->chain, chain_node) {
+			ppc440spe_dma_put_desc(chan, iter);
+			if (!iter->hw_next)
+				break;
+		}
+		break;
+	case PPC440SPE_XOR_ID:
+		/* update h/w links and refetch */
+		if (!xor_last_submit->hw_next)
+			break;
+
+		xor_reg = chan->device->xor_reg;
+		/* the last linked CDB has to generate an interrupt
+		 * that we'd be able to append the next lists to h/w
+		 * regardless of the XOR engine state at the moment of
+		 * appending of these next lists
+		 */
+		xcb = xor_last_linked->hw_desc;
+		xcb->cbc |= XOR_CBCR_CBCE_BIT;
+
+		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
+			/* XORcore is idle. Refetch now */
+			do_xor_refetch = 0;
+			ppc440spe_xor_set_link(xor_last_submit,
+				xor_last_submit->hw_next);
+
+			ADMA_LL_DBG(print_cb_list(chan,
+				xor_last_submit->hw_next));
+
+			xor_last_submit = xor_last_linked;
+			iowrite32be(ioread32be(&xor_reg->crsr) |
+				    XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
+				    &xor_reg->crsr);
+		} else {
+			/* XORcore is running. Refetch later in the handler */
+			do_xor_refetch = 1;
+		}
+
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
+ */
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+
+	if (unlikely(!chan->hw_chain_inited))
+		/* h/w descriptor chain is not initialized yet */
+		return 0;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_reg = chan->device->dma_reg;
+		return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
+	case PPC440SPE_XOR_ID:
+		xor_reg = chan->device->xor_reg;
+		return ioread32be(&xor_reg->ccbalr);
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_chan_run - enable the channel
+ */
+static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
+{
+	struct xor_regs *xor_reg;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* DMAs are always enabled, do nothing */
+		break;
+	case PPC440SPE_XOR_ID:
+		/* drain write buffer */
+		xor_reg = chan->device->xor_reg;
+
+		/* fetch descriptor pointed to in <link> */
+		iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
+			    &xor_reg->crsr);
+		break;
+	}
+}
+
+/******************************************************************************
+ * ADMA device level
+ ******************************************************************************/
+
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan);
+
+static dma_cookie_t
+ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx,
+				    dma_addr_t addr, int index);
+static void
+ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx,
+				  dma_addr_t addr, int index);
+
+static void
+ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx,
+			   dma_addr_t *paddr, unsigned long flags);
+static void
+ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx,
+			  dma_addr_t addr, int index);
+static void
+ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx,
+			       unsigned char mult, int index, int dst_pos);
+static void
+ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx,
+				   dma_addr_t paddr, dma_addr_t qaddr);
+
+static struct page *ppc440spe_rxor_srcs[32];
+
+/**
+ * ppc440spe_can_rxor - check if the operands may be processed with RXOR
+ */
+static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
+{
+	int i, order = 0, state = 0;
+	int idx = 0;
+
+	if (unlikely(!(src_cnt > 1)))
+		return 0;
+
+	BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
+
+	/* Skip holes in the source list before checking */
+	for (i = 0; i < src_cnt; i++) {
+		if (!srcs[i])
+			continue;
+		ppc440spe_rxor_srcs[idx++] = srcs[i];
+	}
+	src_cnt = idx;
+
+	for (i = 1; i < src_cnt; i++) {
+		char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
+		char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
+
+		switch (state) {
+		case 0:
+			if (cur_addr == old_addr + len) {
+				/* direct RXOR */
+				order = 1;
+				state = 1;
+			} else if (old_addr == cur_addr + len) {
+				/* reverse RXOR */
+				order = -1;
+				state = 1;
+			} else
+				goto out;
+			break;
+		case 1:
+			if ((i == src_cnt - 2) ||
+			    (order == -1 && cur_addr != old_addr - len)) {
+				order = 0;
+				state = 0;
+			} else if ((cur_addr == old_addr + len * order) ||
+				   (cur_addr == old_addr + 2 * len) ||
+				   (cur_addr == old_addr + 3 * len)) {
+				state = 2;
+			} else {
+				order = 0;
+				state = 0;
+			}
+			break;
+		case 2:
+			order = 0;
+			state = 0;
+			break;
+		}
+	}
+
+out:
+	if (state == 1 || state == 2)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ppc440spe_adma_device_estimate - estimate the efficiency of processing
+ *	the operation given on this channel. It's assumed that 'chan' is
+ *	capable to process 'cap' type of operation.
+ * @chan: channel to use
+ * @cap: type of transaction
+ * @dst_lst: array of destination pointers
+ * @dst_cnt: number of destination operands
+ * @src_lst: array of source pointers
+ * @src_cnt: number of source operands
+ * @src_sz: size of each source operand
+ */
+static int ppc440spe_adma_estimate(struct dma_chan *chan,
+	enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt,
+	struct page **src_lst, int src_cnt, size_t src_sz)
+{
+	int ef = 1;
+
+	if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
+		/* If RAID-6 capabilities were not activated don't try
+		 * to use them
+		 */
+		if (unlikely(!ppc440spe_r6_enabled))
+			return -1;
+	}
+	/*  In the current implementation of ppc440spe ADMA driver it
+	 * makes sense to pick out only pq case, because it may be
+	 * processed:
+	 * (1) either using Biskup method on DMA2;
+	 * (2) or on DMA0/1.
+	 *  Thus we give a favour to (1) if the sources are suitable;
+	 * else let it be processed on one of the DMA0/1 engines.
+	 *  In the sum_product case where destination is also the
+	 * source process it on DMA0/1 only.
+	 */
+	if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
+
+		if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
+			ef = 0; /* sum_product case, process on DMA0/1 */
+		else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
+			ef = 3; /* override (DMA0/1 + idle) */
+		else
+			ef = 0; /* can't process on DMA2 if !rxor */
+	}
+
+	/* channel idleness increases the priority */
+	if (likely(ef) &&
+	    !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
+		ef++;
+
+	return ef;
+}
+
+struct dma_chan *
+ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
+	struct page **dst_lst, int dst_cnt, struct page **src_lst,
+	int src_cnt, size_t src_sz)
+{
+	struct dma_chan *best_chan = NULL;
+	struct ppc_dma_chan_ref *ref;
+	int best_rank = -1;
+
+	if (unlikely(!src_sz))
+		return NULL;
+	if (src_sz > PAGE_SIZE) {
+		/*
+		 * should a user of the api ever pass > PAGE_SIZE requests
+		 * we sort out cases where temporary page-sized buffers
+		 * are used.
+		 */
+		switch (cap) {
+		case DMA_PQ:
+			if (src_cnt == 1 && dst_lst[1] == src_lst[0])
+				return NULL;
+			if (src_cnt == 2 && dst_lst[1] == src_lst[1])
+				return NULL;
+			break;
+		case DMA_PQ_VAL:
+		case DMA_XOR_VAL:
+			return NULL;
+		default:
+			break;
+		}
+	}
+
+	list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
+		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+			int rank;
+
+			rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
+					dst_cnt, src_lst, src_cnt, src_sz);
+			if (rank > best_rank) {
+				best_rank = rank;
+				best_chan = ref->chan;
+			}
+		}
+	}
+
+	return best_chan;
+}
+EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
+
+/**
+ * ppc440spe_get_group_entry - get group entry with index idx
+ * @tdesc: is the last allocated slot in the group.
+ */
+static struct ppc440spe_adma_desc_slot *
+ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
+{
+	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
+	int i = 0;
+
+	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
+		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
+			__func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
+		BUG();
+	}
+
+	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
+		if (i++ == entry_idx)
+			break;
+	}
+	return iter;
+}
+
+/**
+ * ppc440spe_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &ppc440spe_chan->lock while calling this function
+ */
+static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
+				      struct ppc440spe_adma_chan *chan)
+{
+	int stride = slot->slots_per_op;
+
+	while (stride--) {
+		slot->slots_per_op = 0;
+		slot = list_entry(slot->slot_node.next,
+				struct ppc440spe_adma_desc_slot,
+				slot_node);
+	}
+}
+
+static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
+				 struct ppc440spe_adma_desc_slot *desc)
+{
+	u32 src_cnt, dst_cnt;
+	dma_addr_t addr;
+
+	/*
+	 * get the number of sources & destination
+	 * included in this descriptor and unmap
+	 * them all
+	 */
+	src_cnt = ppc440spe_desc_get_src_num(desc, chan);
+	dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
+
+	/* unmap destinations */
+	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		while (dst_cnt--) {
+			addr = ppc440spe_desc_get_dest_addr(
+				desc, chan, dst_cnt);
+			dma_unmap_page(chan->device->dev,
+					addr, desc->unmap_len,
+					DMA_FROM_DEVICE);
+		}
+	}
+
+	/* unmap sources */
+	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		while (src_cnt--) {
+			addr = ppc440spe_desc_get_src_addr(
+				desc, chan, src_cnt);
+			dma_unmap_page(chan->device->dev,
+					addr, desc->unmap_len,
+					DMA_TO_DEVICE);
+		}
+	}
+}
+
+/**
+ * ppc440spe_adma_run_tx_complete_actions - call functions to be called
+ * upon completion
+ */
+static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
+		struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_adma_chan *chan,
+		dma_cookie_t cookie)
+{
+	int i;
+
+	BUG_ON(desc->async_tx.cookie < 0);
+	if (desc->async_tx.cookie > 0) {
+		cookie = desc->async_tx.cookie;
+		desc->async_tx.cookie = 0;
+
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		if (desc->async_tx.callback)
+			desc->async_tx.callback(
+				desc->async_tx.callback_param);
+
+		/* unmap dma addresses
+		 * (unmap_single vs unmap_page?)
+		 *
+		 * actually, ppc's dma_unmap_page() functions are empty, so
+		 * the following code is just for the sake of completeness
+		 */
+		if (chan && chan->needs_unmap && desc->group_head &&
+		     desc->unmap_len) {
+			struct ppc440spe_adma_desc_slot *unmap =
+							desc->group_head;
+			/* assume 1 slot per op always */
+			u32 slot_count = unmap->slot_cnt;
+
+			/* Run through the group list and unmap addresses */
+			for (i = 0; i < slot_count; i++) {
+				BUG_ON(!unmap);
+				ppc440spe_adma_unmap(chan, unmap);
+				unmap = unmap->hw_next;
+			}
+		}
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(&desc->async_tx);
+
+	return cookie;
+}
+
+/**
+ * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
+ */
+static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_adma_chan *chan)
+{
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx))
+		return 0;
+
+	/* leave the last descriptor in the chain
+	 * so we can append to it
+	 */
+	if (list_is_last(&desc->chain_node, &chan->chain) ||
+	    desc->phys == ppc440spe_chan_get_current_descriptor(chan))
+		return 1;
+
+	if (chan->device->id != PPC440SPE_XOR_ID) {
+		/* our DMA interrupt handler clears opc field of
+		 * each processed descriptor. For all types of
+		 * operations except for ZeroSum we do not actually
+		 * need ack from the interrupt handler. ZeroSum is a
+		 * special case since the result of this operation
+		 * is available from the handler only, so if we see
+		 * such type of descriptor (which is unprocessed yet)
+		 * then leave it in chain.
+		 */
+		struct dma_cdb *cdb = desc->hw_desc;
+		if (cdb->opc == DMA_CDB_OPC_DCHECK128)
+			return 1;
+	}
+
+	dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
+		desc->phys, desc->idx, desc->slots_per_op);
+
+	list_del(&desc->chain_node);
+	ppc440spe_adma_free_slots(desc, chan);
+	return 0;
+}
+
+/**
+ * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine
+ *	which runs through the channel CDBs list until reach the descriptor
+ *	currently processed. When routine determines that all CDBs of group
+ *	are completed then corresponding callbacks (if any) are called and slots
+ *	are freed.
+ */
+static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
+	dma_cookie_t cookie = 0;
+	u32 current_desc = ppc440spe_chan_get_current_descriptor(chan);
+	int busy = ppc440spe_chan_is_busy(chan);
+	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+	dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n",
+		chan->device->id, __func__);
+
+	if (!current_desc) {
+		/*  There were no transactions yet, so
+		 * nothing to clean
+		 */
+		return;
+	}
+
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+	list_for_each_entry_safe(iter, _iter, &chan->chain,
+					chain_node) {
+		dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d "
+		    "busy: %d this_desc: %#llx next_desc: %#x "
+		    "cur: %#x ack: %d\n",
+		    iter->async_tx.cookie, iter->idx, busy, iter->phys,
+		    ppc440spe_desc_get_link(iter, chan), current_desc,
+		    async_tx_test_ack(&iter->async_tx));
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel,subsequent descriptors are either in process
+		 * or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy, or if it appears that the current descriptor
+		 * needs to be re-read (i.e. has been appended to)
+		 */
+		if (iter->phys == current_desc) {
+			BUG_ON(seen_current++);
+			if (busy || ppc440spe_desc_get_link(iter, chan)) {
+				/* not all descriptors of the group have
+				 * been completed; exit.
+				 */
+				break;
+			}
+		}
+
+		/* detect the start of a group transaction */
+		if (!slot_cnt && !slots_per_op) {
+			slot_cnt = iter->slot_cnt;
+			slots_per_op = iter->slots_per_op;
+			if (slot_cnt <= slots_per_op) {
+				slot_cnt = 0;
+				slots_per_op = 0;
+			}
+		}
+
+		if (slot_cnt) {
+			if (!group_start)
+				group_start = iter;
+			slot_cnt -= slots_per_op;
+		}
+
+		/* all the members of a group are complete */
+		if (slots_per_op != 0 && slot_cnt == 0) {
+			struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter;
+			int end_of_chain = 0;
+
+			/* clean up the group */
+			slot_cnt = group_start->slot_cnt;
+			grp_iter = group_start;
+			list_for_each_entry_safe_from(grp_iter, _grp_iter,
+				&chan->chain, chain_node) {
+
+				cookie = ppc440spe_adma_run_tx_complete_actions(
+					grp_iter, chan, cookie);
+
+				slot_cnt -= slots_per_op;
+				end_of_chain = ppc440spe_adma_clean_slot(
+				    grp_iter, chan);
+				if (end_of_chain && slot_cnt) {
+					/* Should wait for ZeroSum completion */
+					if (cookie > 0)
+						chan->common.completed_cookie = cookie;
+					return;
+				}
+
+				if (slot_cnt == 0 || end_of_chain)
+					break;
+			}
+
+			/* the group should be complete at this point */
+			BUG_ON(slot_cnt);
+
+			slots_per_op = 0;
+			group_start = NULL;
+			if (end_of_chain)
+				break;
+			else
+				continue;
+		} else if (slots_per_op) /* wait for group completion */
+			continue;
+
+		cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan,
+		    cookie);
+
+		if (ppc440spe_adma_clean_slot(iter, chan))
+			break;
+	}
+
+	BUG_ON(!seen_current);
+
+	if (cookie > 0) {
+		chan->common.completed_cookie = cookie;
+		pr_debug("\tcompleted cookie %d\n", cookie);
+	}
+
+}
+
+/**
+ * ppc440spe_adma_tasklet - clean up watch-dog initiator
+ */
+static void ppc440spe_adma_tasklet(unsigned long data)
+{
+	struct ppc440spe_adma_chan *chan = (struct ppc440spe_adma_chan *) data;
+
+	spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING);
+	__ppc440spe_adma_slot_cleanup(chan);
+	spin_unlock(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_slot_cleanup - clean up scheduled initiator
+ */
+static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+	spin_lock_bh(&chan->lock);
+	__ppc440spe_adma_slot_cleanup(chan);
+	spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_alloc_slots - allocate free slots (if any)
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
+		struct ppc440spe_adma_chan *chan, int num_slots,
+		int slots_per_op)
+{
+	struct ppc440spe_adma_desc_slot *iter = NULL, *_iter;
+	struct ppc440spe_adma_desc_slot *alloc_start = NULL;
+	struct list_head chain = LIST_HEAD_INIT(chain);
+	int slots_found, retry = 0;
+
+
+	BUG_ON(!num_slots || !slots_per_op);
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = chan->last_used;
+	else
+		iter = list_entry(&chan->all_slots,
+				  struct ppc440spe_adma_desc_slot,
+				  slot_node);
+	list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots,
+	    slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++)
+			alloc_start = iter;
+
+		if (slots_found == num_slots) {
+			struct ppc440spe_adma_desc_slot *alloc_tail = NULL;
+			struct ppc440spe_adma_desc_slot *last_used = NULL;
+
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+				/* pre-ack all but the last descriptor */
+				if (num_slots != slots_per_op)
+					async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->hw_next = NULL;
+				iter->flags = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct ppc440spe_adma_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->group_list);
+			chan->last_used = last_used;
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* try to free some slots if the allocation fails */
+	tasklet_schedule(&chan->irq_tasklet);
+	return NULL;
+}
+
+/**
+ * ppc440spe_adma_alloc_chan_resources -  allocate pools for CDB slots
+ */
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *slot = NULL;
+	char *hw_desc;
+	int i, db_sz;
+	int init;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	init = ppc440spe_chan->slots_allocated ? 0 : 1;
+	chan->chan_id = ppc440spe_chan->device->id;
+
+	/* Allocate descriptor slots */
+	i = ppc440spe_chan->slots_allocated;
+	if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID)
+		db_sz = sizeof(struct dma_cdb);
+	else
+		db_sz = sizeof(struct xor_cb);
+
+	for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) {
+		slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot),
+			       GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "SPE ADMA Channel only initialized"
+				" %d descriptor slots", i--);
+			break;
+		}
+
+		hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[i * db_sz];
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = ppc440spe_adma_tx_submit;
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->group_list);
+		slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz;
+		slot->idx = i;
+
+		spin_lock_bh(&ppc440spe_chan->lock);
+		ppc440spe_chan->slots_allocated++;
+		list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots);
+		spin_unlock_bh(&ppc440spe_chan->lock);
+	}
+
+	if (i && !ppc440spe_chan->last_used) {
+		ppc440spe_chan->last_used =
+			list_entry(ppc440spe_chan->all_slots.next,
+				struct ppc440spe_adma_desc_slot,
+				slot_node);
+	}
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: allocated %d descriptor slots\n",
+		ppc440spe_chan->device->id, i);
+
+	/* initialize the channel and the chain with a null operation */
+	if (init) {
+		switch (ppc440spe_chan->device->id) {
+		case PPC440SPE_DMA0_ID:
+		case PPC440SPE_DMA1_ID:
+			ppc440spe_chan->hw_chain_inited = 0;
+			/* Use WXOR for self-testing */
+			if (!ppc440spe_r6_tchan)
+				ppc440spe_r6_tchan = ppc440spe_chan;
+			break;
+		case PPC440SPE_XOR_ID:
+			ppc440spe_chan_start_null_xor(ppc440spe_chan);
+			break;
+		default:
+			BUG();
+		}
+		ppc440spe_chan->needs_unmap = 1;
+	}
+
+	return (i > 0) ? i : -ENOMEM;
+}
+
+/**
+ * ppc440spe_rxor_set_region_data -
+ */
+static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
+	u8 xor_arg_no, u32 mask)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= mask;
+}
+
+/**
+ * ppc440spe_rxor_set_src -
+ */
+static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
+	u8 xor_arg_no, dma_addr_t addr)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
+	xcb->ops[xor_arg_no].l = addr;
+}
+
+/**
+ * ppc440spe_rxor_set_mult -
+ */
+static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
+	u8 xor_arg_no, u8 idx, u8 mult)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
+}
+
+/**
+ * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold
+ *	has been achieved
+ */
+static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan)
+{
+	dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n",
+		chan->device->id, chan->pending);
+
+	if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) {
+		chan->pending = 0;
+		ppc440spe_chan_append(chan);
+	}
+}
+
+/**
+ * ppc440spe_adma_tx_submit - submit new descriptor group to the channel
+ *	(it's not necessary that descriptors will be submitted to the h/w
+ *	chains too right now)
+ */
+static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc;
+	struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan);
+	struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail;
+	int slot_cnt;
+	int slots_per_op;
+	dma_cookie_t cookie;
+
+	sw_desc = tx_to_ppc440spe_adma_slot(tx);
+
+	group_start = sw_desc->group_head;
+	slot_cnt = group_start->slot_cnt;
+	slots_per_op = group_start->slots_per_op;
+
+	spin_lock_bh(&chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	if (unlikely(list_empty(&chan->chain))) {
+		/* first peer */
+		list_splice_init(&sw_desc->group_list, &chan->chain);
+		chan_first_cdb[chan->device->id] = group_start;
+	} else {
+		/* isn't first peer, bind CDBs to chain */
+		old_chain_tail = list_entry(chan->chain.prev,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		list_splice_init(&sw_desc->group_list,
+		    &old_chain_tail->chain_node);
+		/* fix up the hardware chain */
+		ppc440spe_desc_set_link(chan, old_chain_tail, group_start);
+	}
+
+	/* increment the pending count by the number of operations */
+	chan->pending += slot_cnt / slots_per_op;
+	ppc440spe_adma_check_threshold(chan);
+	spin_unlock_bh(&chan->lock);
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n",
+		chan->device->id, __func__,
+		sw_desc->async_tx.cookie, sw_desc->idx, sw_desc);
+
+	return cookie;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", ppc440spe_chan->device->id,
+		__func__);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	slot_cnt = slots_per_op = 1;
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+			slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan);
+		group_start->unmap_len = 0;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dma_dest,
+		dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	if (unlikely(!len))
+		return NULL;
+
+	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s len: %u int_en %d\n",
+		ppc440spe_chan->device->id, __func__, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+	slot_cnt = slots_per_op = 1;
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+		slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_memcpy(group_start, flags);
+		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+		ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0);
+		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_memset - prepare CDB for a MEMSET operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset(
+		struct dma_chan *chan, dma_addr_t dma_dest, int value,
+		size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	if (unlikely(!len))
+		return NULL;
+
+	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s cal: %u len: %u int_en %d\n",
+		ppc440spe_chan->device->id, __func__, value, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+	slot_cnt = slots_per_op = 1;
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+		slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_memset(group_start, value, flags);
+		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor(
+		struct dma_chan *chan, dma_addr_t dma_dest,
+		dma_addr_t *dma_src, u32 src_cnt, size_t len,
+		unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id,
+				     dma_dest, dma_src, src_cnt));
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+		ppc440spe_chan->device->id, __func__, src_cnt, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+			slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_xor(group_start, src_cnt, flags);
+		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+		while (src_cnt--)
+			ppc440spe_adma_memcpy_xor_set_src(group_start,
+				dma_src[src_cnt], src_cnt);
+		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static inline void
+ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc,
+				int src_cnt);
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor);
+
+/**
+ * ppc440spe_adma_init_dma2rxor_slot -
+ */
+static void ppc440spe_adma_init_dma2rxor_slot(
+		struct ppc440spe_adma_desc_slot *desc,
+		dma_addr_t *src, int src_cnt)
+{
+	int i;
+
+	/* initialize CDB */
+	for (i = 0; i < src_cnt; i++) {
+		ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
+						 desc->src_cnt, (u32)src[i]);
+	}
+}
+
+/**
+ * ppc440spe_dma01_prep_mult -
+ * for Q operation where destination is also the source
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	unsigned long op = 0;
+	int slot_cnt;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	slot_cnt = 2;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	/* use WXOR, each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		struct ppc440spe_adma_chan *chan;
+		struct ppc440spe_adma_desc_slot *iter;
+		struct dma_cdb *hw_desc;
+
+		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+		set_bits(op, &sw_desc->flags);
+		sw_desc->src_cnt = src_cnt;
+		sw_desc->dst_cnt = dst_cnt;
+		/* First descriptor, zero data in the destination and copy it
+		 * to q page using MULTICAST transfer.
+		 */
+		iter = list_first_entry(&sw_desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		/* set 'next' pointer */
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+		ppc440spe_desc_set_dest_addr(iter, chan,
+					     DMA_CUED_XOR_BASE, dst[0], 0);
+		ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    src[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/*
+		 * Second descriptor, multiply data from the q page
+		 * and store the result in real destination.
+		 */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = NULL;
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0,
+					    DMA_CUED_XOR_HB, dst[1]);
+		ppc440spe_desc_set_dest_addr(iter, chan,
+					     DMA_CUED_XOR_BASE, dst[0], 0);
+
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					    DMA_CDB_SG_DST1, scf[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_dma01_prep_sum_product -
+ * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
+ * the source.
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	unsigned long op = 0;
+	int slot_cnt;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	slot_cnt = 3;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	/* WXOR, each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		struct ppc440spe_adma_chan *chan;
+		struct ppc440spe_adma_desc_slot *iter;
+		struct dma_cdb *hw_desc;
+
+		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+		set_bits(op, &sw_desc->flags);
+		sw_desc->src_cnt = src_cnt;
+		sw_desc->dst_cnt = 1;
+		/* 1st descriptor, src[1] data to q page and zero destination */
+		iter = list_first_entry(&sw_desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					     *dst, 0);
+		ppc440spe_desc_set_dest_addr(iter, chan, 0,
+					     ppc440spe_chan->qdest, 1);
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    src[1]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/* 2nd descriptor, multiply src[1] data and store the
+		 * result in destination */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		/* set 'next' pointer */
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    ppc440spe_chan->qdest);
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					     *dst, 0);
+		ppc440spe_desc_set_src_mult(iter, chan,	DMA_CUED_MULT1_OFF,
+					    DMA_CDB_SG_DST1, scf[1]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/*
+		 * 3rd descriptor, multiply src[0] data and xor it
+		 * with destination
+		 */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = NULL;
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    src[0]);
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					     *dst, 0);
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					    DMA_CDB_SG_DST1, scf[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	int slot_cnt;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+	unsigned long op = 0;
+	unsigned char mult = 1;
+
+	pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+		 __func__, dst_cnt, src_cnt, len);
+	/*  select operations WXOR/RXOR depending on the
+	 * source addresses of operators and the number
+	 * of destinations (RXOR support only Q-parity calculations)
+	 */
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
+		/* no active RXOR;
+		 * do RXOR if:
+		 * - there are more than 1 source,
+		 * - len is aligned on 512-byte boundary,
+		 * - source addresses fit to one of 4 possible regions.
+		 */
+		if (src_cnt > 1 &&
+		    !(len & MQ0_CF2H_RXOR_BS_MASK) &&
+		    (src[0] + len) == src[1]) {
+			/* may do RXOR R1 R2 */
+			set_bit(PPC440SPE_DESC_RXOR, &op);
+			if (src_cnt != 2) {
+				/* may try to enhance region of RXOR */
+				if ((src[1] + len) == src[2]) {
+					/* do RXOR R1 R2 R3 */
+					set_bit(PPC440SPE_DESC_RXOR123,
+						&op);
+				} else if ((src[1] + len * 2) == src[2]) {
+					/* do RXOR R1 R2 R4 */
+					set_bit(PPC440SPE_DESC_RXOR124, &op);
+				} else if ((src[1] + len * 3) == src[2]) {
+					/* do RXOR R1 R2 R5 */
+					set_bit(PPC440SPE_DESC_RXOR125,
+						&op);
+				} else {
+					/* do RXOR R1 R2 */
+					set_bit(PPC440SPE_DESC_RXOR12,
+						&op);
+				}
+			} else {
+				/* do RXOR R1 R2 */
+				set_bit(PPC440SPE_DESC_RXOR12, &op);
+			}
+		}
+
+		if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+			/* can not do this operation with RXOR */
+			clear_bit(PPC440SPE_RXOR_RUN,
+				&ppc440spe_rxor_state);
+		} else {
+			/* can do; set block size right now */
+			ppc440spe_desc_set_rxor_block_size(len);
+		}
+	}
+
+	/* Number of necessary slots depends on operation type selected */
+	if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+		/*  This is a WXOR only chain. Need descriptors for each
+		 * source to GF-XOR them with WXOR, and need descriptors
+		 * for each destination to zero them with WXOR
+		 */
+		slot_cnt = src_cnt;
+
+		if (flags & DMA_PREP_ZERO_P) {
+			slot_cnt++;
+			set_bit(PPC440SPE_ZERO_P, &op);
+		}
+		if (flags & DMA_PREP_ZERO_Q) {
+			slot_cnt++;
+			set_bit(PPC440SPE_ZERO_Q, &op);
+		}
+	} else {
+		/*  Need 1/2 descriptor for RXOR operation, and
+		 * need (src_cnt - (2 or 3)) for WXOR of sources
+		 * remained (if any)
+		 */
+		slot_cnt = dst_cnt;
+
+		if (flags & DMA_PREP_ZERO_P)
+			set_bit(PPC440SPE_ZERO_P, &op);
+		if (flags & DMA_PREP_ZERO_Q)
+			set_bit(PPC440SPE_ZERO_Q, &op);
+
+		if (test_bit(PPC440SPE_DESC_RXOR12, &op))
+			slot_cnt += src_cnt - 2;
+		else
+			slot_cnt += src_cnt - 3;
+
+		/*  Thus we have either RXOR only chain or
+		 * mixed RXOR/WXOR
+		 */
+		if (slot_cnt == dst_cnt)
+			/* RXOR only chain */
+			clear_bit(PPC440SPE_DESC_WXOR, &op);
+	}
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	/* for both RXOR/WXOR each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
+				flags, op);
+
+		/* setup dst/src/mult */
+		pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
+			 __func__, dst[0], dst[1]);
+		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+		while (src_cnt--) {
+			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+						  src_cnt);
+
+			/* NOTE: "Multi = 0 is equivalent to = 1" as it
+			 * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
+			 * doesn't work for RXOR with DMA0/1! Instead, multi=0
+			 * leads to zeroing source data after RXOR.
+			 * So, for P case set-up mult=1 explicitly.
+			 */
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				mult = scf[src_cnt];
+			ppc440spe_adma_pq_set_src_mult(sw_desc,
+				mult, src_cnt,  dst_cnt - 1);
+		}
+
+		/* Setup byte count foreach slot just allocated */
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list,
+				chain_node) {
+			ppc440spe_desc_set_byte_count(iter,
+				ppc440spe_chan, len);
+			iter->unmap_len = len;
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	int slot_cnt, descs_per_op;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+	unsigned long op = 0;
+	unsigned char mult = 1;
+
+	BUG_ON(!dst_cnt);
+	/*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+		 __func__, dst_cnt, src_cnt, len);*/
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
+	if (descs_per_op < 0) {
+		spin_unlock_bh(&ppc440spe_chan->lock);
+		return NULL;
+	}
+
+	/* depending on number of sources we have 1 or 2 RXOR chains */
+	slot_cnt = descs_per_op * dst_cnt;
+
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		op = slot_cnt;
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
+				--op ? 0 : flags);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+				len);
+			iter->unmap_len = len;
+
+			ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
+			iter->rxor_cursor.len = len;
+			iter->descs_per_op = descs_per_op;
+		}
+		op = 0;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			op++;
+			if (op % descs_per_op == 0)
+				ppc440spe_adma_init_dma2rxor_slot(iter, src,
+								  src_cnt);
+			if (likely(!list_is_last(&iter->chain_node,
+						 &sw_desc->group_list))) {
+				/* set 'next' pointer */
+				iter->hw_next =
+					list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+				ppc440spe_xor_set_link(iter, iter->hw_next);
+			} else {
+				/* this is the last descriptor. */
+				iter->hw_next = NULL;
+			}
+		}
+
+		/* fixup head descriptor */
+		sw_desc->dst_cnt = dst_cnt;
+		if (flags & DMA_PREP_ZERO_P)
+			set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
+		if (flags & DMA_PREP_ZERO_Q)
+			set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
+
+		/* setup dst/src/mult */
+		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+
+		while (src_cnt--) {
+			/* handle descriptors (if dst_cnt == 2) inside
+			 * the ppc440spe_adma_pq_set_srcxxx() functions
+			 */
+			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+						  src_cnt);
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				mult = scf[src_cnt];
+			ppc440spe_adma_pq_set_src_mult(sw_desc,
+					mult, src_cnt, dst_cnt - 1);
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+	ppc440spe_desc_set_rxor_block_size(len);
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(
+		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf,
+		size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	int dst_cnt = 0;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
+				    dst, src, src_cnt));
+	BUG_ON(!len);
+	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+	BUG_ON(!src_cnt);
+
+	if (src_cnt == 1 && dst[1] == src[0]) {
+		dma_addr_t dest[2];
+
+		/* dst[1] is real destination (Q) */
+		dest[0] = dst[1];
+		/* this is the page to multicast source data to */
+		dest[1] = ppc440spe_chan->qdest;
+		sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
+				dest, 2, src, src_cnt, scf, len, flags);
+		return sw_desc ? &sw_desc->async_tx : NULL;
+	}
+
+	if (src_cnt == 2 && dst[1] == src[1]) {
+		sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
+					&dst[1], src, 2, scf, len, flags);
+		return sw_desc ? &sw_desc->async_tx : NULL;
+	}
+
+	if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
+		BUG_ON(!dst[0]);
+		dst_cnt++;
+		flags |= DMA_PREP_ZERO_P;
+	}
+
+	if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
+		BUG_ON(!dst[1]);
+		dst_cnt++;
+		flags |= DMA_PREP_ZERO_Q;
+	}
+
+	BUG_ON(!dst_cnt);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+		ppc440spe_chan->device->id, __func__, src_cnt, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+	switch (ppc440spe_chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
+				dst, dst_cnt, src, src_cnt, scf,
+				len, flags);
+		break;
+
+	case PPC440SPE_XOR_ID:
+		sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
+				dst, dst_cnt, src, src_cnt, scf,
+				len, flags);
+		break;
+	}
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
+ * a PQ_ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum(
+		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		enum sum_check_flags *pqres, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+	dma_addr_t pdest, qdest;
+	int slot_cnt, slots_per_op, idst, dst_cnt;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		pdest = 0;
+	else
+		pdest = pq[0];
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		qdest = 0;
+	else
+		qdest = pq[1];
+
+	ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
+					    src, src_cnt, scf));
+
+	/* Always use WXOR for P/Q calculations (two destinations).
+	 * Need 1 or 2 extra slots to verify results are zero.
+	 */
+	idst = dst_cnt = (pdest && qdest) ? 2 : 1;
+
+	/* One additional slot per destination to clone P/Q
+	 * before calculation (we have to preserve destinations).
+	 */
+	slot_cnt = src_cnt + dst_cnt * 2;
+	slots_per_op = 1;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+					     slots_per_op);
+	if (sw_desc) {
+		ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
+
+		/* Setup byte count for each slot just allocated */
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+						      len);
+			iter->unmap_len = len;
+		}
+
+		if (pdest) {
+			struct dma_cdb *hw_desc;
+			struct ppc440spe_adma_chan *chan;
+
+			iter = sw_desc->group_head;
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+			iter->hw_next = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter->src_cnt = 0;
+			iter->dst_cnt = 0;
+			ppc440spe_desc_set_dest_addr(iter, chan, 0,
+						     ppc440spe_chan->pdest, 0);
+			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+						      len);
+			iter->unmap_len = 0;
+			/* override pdest to preserve original P */
+			pdest = ppc440spe_chan->pdest;
+		}
+		if (qdest) {
+			struct dma_cdb *hw_desc;
+			struct ppc440spe_adma_chan *chan;
+
+			iter = list_first_entry(&sw_desc->group_list,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+
+			if (pdest) {
+				iter = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			}
+
+			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+			iter->hw_next = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter->src_cnt = 0;
+			iter->dst_cnt = 0;
+			ppc440spe_desc_set_dest_addr(iter, chan, 0,
+						     ppc440spe_chan->qdest, 0);
+			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+						      len);
+			iter->unmap_len = 0;
+			/* override qdest to preserve original Q */
+			qdest = ppc440spe_chan->qdest;
+		}
+
+		/* Setup destinations for P/Q ops */
+		ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
+
+		/* Setup zero QWORDs into DCHECK CDBs */
+		idst = dst_cnt;
+		list_for_each_entry_reverse(iter, &sw_desc->group_list,
+					    chain_node) {
+			/*
+			 * The last CDB corresponds to Q-parity check,
+			 * the one before last CDB corresponds
+			 * P-parity check
+			 */
+			if (idst == DMA_DEST_MAX_NUM) {
+				if (idst == dst_cnt) {
+					set_bit(PPC440SPE_DESC_QCHECK,
+						&iter->flags);
+				} else {
+					set_bit(PPC440SPE_DESC_PCHECK,
+						&iter->flags);
+				}
+			} else {
+				if (qdest) {
+					set_bit(PPC440SPE_DESC_QCHECK,
+						&iter->flags);
+				} else {
+					set_bit(PPC440SPE_DESC_PCHECK,
+						&iter->flags);
+				}
+			}
+			iter->xor_check_result = pqres;
+
+			/*
+			 * set it to zero, if check fail then result will
+			 * be updated
+			 */
+			*iter->xor_check_result = 0;
+			ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
+				ppc440spe_qword);
+
+			if (!(--dst_cnt))
+				break;
+		}
+
+		/* Setup sources and mults for P/Q ops */
+		list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
+						     chain_node) {
+			struct ppc440spe_adma_chan *chan;
+			u32 mult_dst;
+
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+			ppc440spe_desc_set_src_addr(iter, chan, 0,
+						    DMA_CUED_XOR_HB,
+						    src[src_cnt - 1]);
+			if (qdest) {
+				mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
+							   DMA_CDB_SG_DST1;
+				ppc440spe_desc_set_src_mult(iter, chan,
+							    DMA_CUED_MULT1_OFF,
+							    mult_dst,
+							    scf[src_cnt - 1]);
+			}
+			if (!(--src_cnt))
+				break;
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
+ * XOR ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum(
+		struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
+		size_t len, enum sum_check_flags *result, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t pq[2];
+
+	/* validate P, disable Q */
+	pq[0] = src[0];
+	pq[1] = 0;
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+
+	tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
+						src_cnt - 1, 0, len,
+						result, flags);
+	return tx;
+}
+
+/**
+ * ppc440spe_adma_set_dest - set destination address into descriptor
+ */
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+
+	BUG_ON(index >= sw_desc->dst_cnt);
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* to do: support transfers lengths >
+		 * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT
+		 */
+		ppc440spe_desc_set_dest_addr(sw_desc->group_head,
+			chan, 0, addr, index);
+		break;
+	case PPC440SPE_XOR_ID:
+		sw_desc = ppc440spe_get_group_entry(sw_desc, index);
+		ppc440spe_desc_set_dest_addr(sw_desc,
+			chan, 0, addr, index);
+		break;
+	}
+}
+
+static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
+		struct ppc440spe_adma_chan *chan, dma_addr_t addr)
+{
+	/*  To clear destinations update the descriptor
+	 * (P or Q depending on index) as follows:
+	 * addr is destination (0 corresponds to SG2):
+	 */
+	ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
+
+	/* ... and the addr is source: */
+	ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
+
+	/* addr is always SG2 then the mult is always DST1 */
+	ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+				    DMA_CDB_SG_DST1, 1);
+}
+
+/**
+ * ppc440spe_adma_pq_set_dest - set destination address into descriptor
+ * for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t *addrs, unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *iter;
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t paddr, qaddr;
+	dma_addr_t addr = 0, ppath, qpath;
+	int index = 0, i;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		paddr = 0;
+	else
+		paddr = addrs[0];
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		qaddr = 0;
+	else
+		qaddr = addrs[1];
+
+	if (!paddr || !qaddr)
+		addr = paddr ? paddr : qaddr;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* walk through the WXOR source list and set P/Q-destinations
+		 * for each slot:
+		 */
+		if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			/* This is WXOR-only chain; may have 1/2 zero descs */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				index++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				index++;
+
+			iter = ppc440spe_get_group_entry(sw_desc, index);
+			if (addr) {
+				/* one destination */
+				list_for_each_entry_from(iter,
+					&sw_desc->group_list, chain_node)
+					ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, addr, 0);
+			} else {
+				/* two destinations */
+				list_for_each_entry_from(iter,
+					&sw_desc->group_list, chain_node) {
+					ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, paddr, 0);
+					ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, qaddr, 1);
+				}
+			}
+
+			if (index) {
+				/*  To clear destinations update the descriptor
+				 * (1st,2nd, or both depending on flags)
+				 */
+				index = 0;
+				if (test_bit(PPC440SPE_ZERO_P,
+						&sw_desc->flags)) {
+					iter = ppc440spe_get_group_entry(
+							sw_desc, index++);
+					ppc440spe_adma_pq_zero_op(iter, chan,
+							paddr);
+				}
+
+				if (test_bit(PPC440SPE_ZERO_Q,
+						&sw_desc->flags)) {
+					iter = ppc440spe_get_group_entry(
+							sw_desc, index++);
+					ppc440spe_adma_pq_zero_op(iter, chan,
+							qaddr);
+				}
+
+				return;
+			}
+		} else {
+			/* This is RXOR-only or RXOR/WXOR mixed chain */
+
+			/* If we want to include destination into calculations,
+			 * then make dest addresses cued with mult=1 (XOR).
+			 */
+			ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+					DMA_CUED_XOR_HB :
+					DMA_CUED_XOR_BASE |
+						(1 << DMA_CUED_MULT1_OFF);
+			qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+					DMA_CUED_XOR_HB :
+					DMA_CUED_XOR_BASE |
+						(1 << DMA_CUED_MULT1_OFF);
+
+			/* Setup destination(s) in RXOR slot(s) */
+			iter = ppc440spe_get_group_entry(sw_desc, index++);
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						paddr ? ppath : qpath,
+						paddr ? paddr : qaddr, 0);
+			if (!addr) {
+				/* two destinations */
+				iter = ppc440spe_get_group_entry(sw_desc,
+								 index++);
+				ppc440spe_desc_set_dest_addr(iter, chan,
+						qpath, qaddr, 0);
+			}
+
+			if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
+				/* Setup destination(s) in remaining WXOR
+				 * slots
+				 */
+				iter = ppc440spe_get_group_entry(sw_desc,
+								 index);
+				if (addr) {
+					/* one destination */
+					list_for_each_entry_from(iter,
+					    &sw_desc->group_list,
+					    chain_node)
+						ppc440spe_desc_set_dest_addr(
+							iter, chan,
+							DMA_CUED_XOR_BASE,
+							addr, 0);
+
+				} else {
+					/* two destinations */
+					list_for_each_entry_from(iter,
+					    &sw_desc->group_list,
+					    chain_node) {
+						ppc440spe_desc_set_dest_addr(
+							iter, chan,
+							DMA_CUED_XOR_BASE,
+							paddr, 0);
+						ppc440spe_desc_set_dest_addr(
+							iter, chan,
+							DMA_CUED_XOR_BASE,
+							qaddr, 1);
+					}
+				}
+			}
+
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		/* DMA2 descriptors have only 1 destination, so there are
+		 * two chains - one for each dest.
+		 * If we want to include destination into calculations,
+		 * then make dest addresses cued with mult=1 (XOR).
+		 */
+		ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+				DMA_CUED_XOR_HB :
+				DMA_CUED_XOR_BASE |
+					(1 << DMA_CUED_MULT1_OFF);
+
+		qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+				DMA_CUED_XOR_HB :
+				DMA_CUED_XOR_BASE |
+					(1 << DMA_CUED_MULT1_OFF);
+
+		iter = ppc440spe_get_group_entry(sw_desc, 0);
+		for (i = 0; i < sw_desc->descs_per_op; i++) {
+			ppc440spe_desc_set_dest_addr(iter, chan,
+				paddr ? ppath : qpath,
+				paddr ? paddr : qaddr, 0);
+			iter = list_entry(iter->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+		}
+
+		if (!addr) {
+			/* Two destinations; setup Q here */
+			iter = ppc440spe_get_group_entry(sw_desc,
+				sw_desc->descs_per_op);
+			for (i = 0; i < sw_desc->descs_per_op; i++) {
+				ppc440spe_desc_set_dest_addr(iter,
+					chan, qpath, qaddr, 0);
+				iter = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			}
+		}
+
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
+ * for the PQ_ZERO_SUM operation
+ */
+static void ppc440spe_adma_pqzero_sum_set_dest(
+		struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t paddr, dma_addr_t qaddr)
+{
+	struct ppc440spe_adma_desc_slot *iter, *end;
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t addr = 0;
+	int idx;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	/* walk through the WXOR source list and set P/Q-destinations
+	 * for each slot
+	 */
+	idx = (paddr && qaddr) ? 2 : 1;
+	/* set end */
+	list_for_each_entry_reverse(end, &sw_desc->group_list,
+				    chain_node) {
+		if (!(--idx))
+			break;
+	}
+	/* set start */
+	idx = (paddr && qaddr) ? 2 : 1;
+	iter = ppc440spe_get_group_entry(sw_desc, idx);
+
+	if (paddr && qaddr) {
+		/* two destinations */
+		list_for_each_entry_from(iter, &sw_desc->group_list,
+					 chain_node) {
+			if (unlikely(iter == end))
+				break;
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, paddr, 0);
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, qaddr, 1);
+		}
+	} else {
+		/* one destination */
+		addr = paddr ? paddr : qaddr;
+		list_for_each_entry_from(iter, &sw_desc->group_list,
+					 chain_node) {
+			if (unlikely(iter == end))
+				break;
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, addr, 0);
+		}
+	}
+
+	/*  The remaining descriptors are DATACHECK. These have no need in
+	 * destination. Actually, these destinations are used there
+	 * as sources for check operation. So, set addr as source.
+	 */
+	ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
+
+	if (!addr) {
+		end = list_entry(end->chain_node.next,
+				 struct ppc440spe_adma_desc_slot, chain_node);
+		ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
+	}
+}
+
+/**
+ * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
+ */
+static inline void ppc440spe_desc_set_xor_src_cnt(
+			struct ppc440spe_adma_desc_slot *desc,
+			int src_cnt)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
+	hw_desc->cbc |= src_cnt;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t haddr = 0;
+	struct ppc440spe_adma_desc_slot *iter = NULL;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
+		 */
+		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			/* RXOR-only or RXOR/WXOR operation */
+			int iskip = test_bit(PPC440SPE_DESC_RXOR12,
+				&sw_desc->flags) ?  2 : 3;
+
+			if (index == 0) {
+				/* 1st slot (RXOR) */
+				/* setup sources region (R1-2-3, R1-2-4,
+				 * or R1-2-5)
+				 */
+				if (test_bit(PPC440SPE_DESC_RXOR12,
+						&sw_desc->flags))
+					haddr = DMA_RXOR12 <<
+						DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR123,
+				    &sw_desc->flags))
+					haddr = DMA_RXOR123 <<
+						DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR124,
+				    &sw_desc->flags))
+					haddr = DMA_RXOR124 <<
+						DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR125,
+				    &sw_desc->flags))
+					haddr = DMA_RXOR125 <<
+						DMA_CUED_REGION_OFF;
+				else
+					BUG();
+				haddr |= DMA_CUED_XOR_BASE;
+				iter = ppc440spe_get_group_entry(sw_desc, 0);
+			} else if (index < iskip) {
+				/* 1st slot (RXOR)
+				 * shall actually set source address only once
+				 * instead of first <iskip>
+				 */
+				iter = NULL;
+			} else {
+				/* 2nd/3d and next slots (WXOR);
+				 * skip first slot with RXOR
+				 */
+				haddr = DMA_CUED_XOR_HB;
+				iter = ppc440spe_get_group_entry(sw_desc,
+				    index - iskip + sw_desc->dst_cnt);
+			}
+		} else {
+			int znum = 0;
+
+			/* WXOR-only operation; skip first slots with
+			 * zeroing destinations
+			 */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				znum++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				znum++;
+
+			haddr = DMA_CUED_XOR_HB;
+			iter = ppc440spe_get_group_entry(sw_desc,
+					index + znum);
+		}
+
+		if (likely(iter)) {
+			ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
+
+			if (!index &&
+			    test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
+			    sw_desc->dst_cnt == 2) {
+				/* if we have two destinations for RXOR, then
+				 * setup source in the second descr too
+				 */
+				iter = ppc440spe_get_group_entry(sw_desc, 1);
+				ppc440spe_desc_set_src_addr(iter, chan, 0,
+					haddr, addr);
+			}
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		/* DMA2 may do Biskup */
+		iter = sw_desc->group_head;
+		if (iter->dst_cnt == 2) {
+			/* both P & Q calculations required; set P src here */
+			ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+
+			/* this is for Q */
+			iter = ppc440spe_get_group_entry(sw_desc,
+				sw_desc->descs_per_op);
+		}
+		ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_memcpy_xor_set_src(
+		struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+	sw_desc = sw_desc->group_head;
+
+	if (likely(sw_desc))
+		ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_inc_addr  -
+ */
+static void ppc440spe_adma_dma2rxor_inc_addr(
+		struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_rxor *cursor, int index, int src_cnt)
+{
+	cursor->addr_count++;
+	if (index == src_cnt - 1) {
+		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+	} else if (cursor->addr_count == XOR_MAX_OPS) {
+		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+		cursor->addr_count = 0;
+		cursor->desc_count++;
+	}
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
+ */
+static int ppc440spe_adma_dma2rxor_prep_src(
+		struct ppc440spe_adma_desc_slot *hdesc,
+		struct ppc440spe_rxor *cursor, int index,
+		int src_cnt, u32 addr)
+{
+	int rval = 0;
+	u32 sign;
+	struct ppc440spe_adma_desc_slot *desc = hdesc;
+	int i;
+
+	for (i = 0; i < cursor->desc_count; i++) {
+		desc = list_entry(hdesc->chain_node.next,
+				  struct ppc440spe_adma_desc_slot,
+				  chain_node);
+	}
+
+	switch (cursor->state) {
+	case 0:
+		if (addr == cursor->addrl + cursor->len) {
+			/* direct RXOR */
+			cursor->state = 1;
+			cursor->xor_count++;
+			if (index == src_cnt-1) {
+				ppc440spe_rxor_set_region(desc,
+					cursor->addr_count,
+					DMA_RXOR12 << DMA_CUED_REGION_OFF);
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else if (cursor->addrl == addr + cursor->len) {
+			/* reverse RXOR */
+			cursor->state = 1;
+			cursor->xor_count++;
+			set_bit(cursor->addr_count, &desc->reverse_flags[0]);
+			if (index == src_cnt-1) {
+				ppc440spe_rxor_set_region(desc,
+					cursor->addr_count,
+					DMA_RXOR12 << DMA_CUED_REGION_OFF);
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else {
+			printk(KERN_ERR "Cannot build "
+				"DMA2 RXOR command block.\n");
+			BUG();
+		}
+		break;
+	case 1:
+		sign = test_bit(cursor->addr_count,
+				desc->reverse_flags)
+			? -1 : 1;
+		if (index == src_cnt-2 || (sign == -1
+			&& addr != cursor->addrl - 2*cursor->len)) {
+			cursor->state = 0;
+			cursor->xor_count = 1;
+			cursor->addrl = addr;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR12 << DMA_CUED_REGION_OFF);
+			ppc440spe_adma_dma2rxor_inc_addr(
+				desc, cursor, index, src_cnt);
+		} else if (addr == cursor->addrl + 2*sign*cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR123 << DMA_CUED_REGION_OFF);
+			if (index == src_cnt-1) {
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else if (addr == cursor->addrl + 3*cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR124 << DMA_CUED_REGION_OFF);
+			if (index == src_cnt-1) {
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else if (addr == cursor->addrl + 4*cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR125 << DMA_CUED_REGION_OFF);
+			if (index == src_cnt-1) {
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else {
+			cursor->state = 0;
+			cursor->xor_count = 1;
+			cursor->addrl = addr;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR12 << DMA_CUED_REGION_OFF);
+			ppc440spe_adma_dma2rxor_inc_addr(
+				desc, cursor, index, src_cnt);
+		}
+		break;
+	case 2:
+		cursor->state = 0;
+		cursor->addrl = addr;
+		cursor->xor_count++;
+		if (index) {
+			ppc440spe_adma_dma2rxor_inc_addr(
+				desc, cursor, index, src_cnt);
+		}
+		break;
+	}
+
+	return rval;
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
+ *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_src(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, dma_addr_t addr)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+	int k = 0, op = 0, lop = 0;
+
+	/* get the RXOR operand which corresponds to index addr */
+	while (op <= index) {
+		lop = op;
+		if (k == XOR_MAX_OPS) {
+			k = 0;
+			desc = list_entry(desc->chain_node.next,
+				struct ppc440spe_adma_desc_slot, chain_node);
+			xcb = desc->hw_desc;
+
+		}
+		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+			op += 2;
+		else
+			op += 3;
+	}
+
+	BUG_ON(k < 1);
+
+	if (test_bit(k-1, desc->reverse_flags)) {
+		/* reverse operand order; put last op in RXOR group */
+		if (index == op - 1)
+			ppc440spe_rxor_set_src(desc, k - 1, addr);
+	} else {
+		/* direct operand order; put first op in RXOR group */
+		if (index == lop)
+			ppc440spe_rxor_set_src(desc, k - 1, addr);
+	}
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
+ *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_mult(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, u8 mult)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+	int k = 0, op = 0, lop = 0;
+
+	/* get the RXOR operand which corresponds to index mult */
+	while (op <= index) {
+		lop = op;
+		if (k == XOR_MAX_OPS) {
+			k = 0;
+			desc = list_entry(desc->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+			xcb = desc->hw_desc;
+
+		}
+		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+			op += 2;
+		else
+			op += 3;
+	}
+
+	BUG_ON(k < 1);
+	if (test_bit(k-1, desc->reverse_flags)) {
+		/* reverse order */
+		ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
+	} else {
+		/* direct order */
+		ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
+	}
+}
+
+/**
+ * ppc440spe_init_rxor_cursor -
+ */
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
+{
+	memset(cursor, 0, sizeof(struct ppc440spe_rxor));
+	cursor->state = 2;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
+ * descriptor for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_src_mult(
+		struct ppc440spe_adma_desc_slot *sw_desc,
+		unsigned char mult, int index, int dst_pos)
+{
+	struct ppc440spe_adma_chan *chan;
+	u32 mult_idx, mult_dst;
+	struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			int region = test_bit(PPC440SPE_DESC_RXOR12,
+					&sw_desc->flags) ? 2 : 3;
+
+			if (index < region) {
+				/* RXOR multipliers */
+				iter = ppc440spe_get_group_entry(sw_desc,
+					sw_desc->dst_cnt - 1);
+				if (sw_desc->dst_cnt == 2)
+					iter1 = ppc440spe_get_group_entry(
+							sw_desc, 0);
+
+				mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
+				mult_dst = DMA_CDB_SG_SRC;
+			} else {
+				/* WXOR multiplier */
+				iter = ppc440spe_get_group_entry(sw_desc,
+							index - region +
+							sw_desc->dst_cnt);
+				mult_idx = DMA_CUED_MULT1_OFF;
+				mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
+						     DMA_CDB_SG_DST1;
+			}
+		} else {
+			int znum = 0;
+
+			/* WXOR-only;
+			 * skip first slots with destinations (if ZERO_DST has
+			 * place)
+			 */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				znum++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				znum++;
+
+			iter = ppc440spe_get_group_entry(sw_desc, index + znum);
+			mult_idx = DMA_CUED_MULT1_OFF;
+			mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
+		}
+
+		if (likely(iter)) {
+			ppc440spe_desc_set_src_mult(iter, chan,
+				mult_idx, mult_dst, mult);
+
+			if (unlikely(iter1)) {
+				/* if we have two destinations for RXOR, then
+				 * we've just set Q mult. Set-up P now.
+				 */
+				ppc440spe_desc_set_src_mult(iter1, chan,
+					mult_idx, mult_dst, 1);
+			}
+
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		iter = sw_desc->group_head;
+		if (sw_desc->dst_cnt == 2) {
+			/* both P & Q calculations required; set P mult here */
+			ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
+
+			/* and then set Q mult */
+			iter = ppc440spe_get_group_entry(sw_desc,
+			       sw_desc->descs_per_op);
+		}
+		ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_free_chan_resources - free the resources allocated
+ */
+static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe_reverse(iter, _iter,
+			&ppc440spe_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		ppc440spe_chan->slots_allocated--;
+	}
+	ppc440spe_chan->last_used = NULL;
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d %s slots_allocated %d\n",
+		ppc440spe_chan->device->id,
+		__func__, ppc440spe_chan->slots_allocated);
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+}
+
+/**
+ * ppc440spe_adma_tx_status - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ * @txstate: a holder for the current state of the channel
+ */
+static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	enum dma_status ret;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+/**
+ * ppc440spe_adma_eot_handler - end of transfer interrupt handler
+ */
+static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
+{
+	struct ppc440spe_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+	ppc440spe_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_adma_err_handler - DMA error interrupt handler;
+ *	do the same things as a eot handler
+ */
+static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
+{
+	struct ppc440spe_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+	ppc440spe_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_test_callback - called when test operation has been done
+ */
+static void ppc440spe_test_callback(void *unused)
+{
+	complete(&ppc440spe_r6_test_comp);
+}
+
+/**
+ * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
+ */
+static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id,
+		__func__, ppc440spe_chan->pending);
+
+	if (ppc440spe_chan->pending) {
+		ppc440spe_chan->pending = 0;
+		ppc440spe_chan_append(ppc440spe_chan);
+	}
+}
+
+/**
+ * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines
+ *	use FIFOs (as opposite to chains used in XOR) so this is a XOR
+ *	specific operation)
+ */
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+	spin_lock_bh(&chan->lock);
+	slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op);
+	sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		list_splice_init(&sw_desc->group_list, &chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		ppc440spe_desc_init_null_xor(group_start);
+
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		chan->common.completed_cookie = cookie - 1;
+
+		/* channel should not be busy */
+		BUG_ON(ppc440spe_chan_is_busy(chan));
+
+		/* set the descriptor address */
+		ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc);
+
+		/* run the descriptor */
+		ppc440spe_chan_run(chan);
+	} else
+		printk(KERN_ERR "ppc440spe adma%d"
+			" failed to allocate null descriptor\n",
+			chan->device->id);
+	spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
+ *	For this we just perform one WXOR operation with the same source
+ *	and destination addresses, the GF-multiplier is 1; so if RAID-6
+ *	capabilities are enabled then we'll get src/dst filled with zero.
+ */
+static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+	struct page *pg;
+	char *a;
+	dma_addr_t dma_addr, addrs[2];
+	unsigned long op = 0;
+	int rval = 0;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+
+	pg = alloc_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+
+	spin_lock_bh(&chan->lock);
+	sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
+	if (sw_desc) {
+		/* 1 src, 1 dsr, int_ena, WXOR */
+		ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
+			iter->unmap_len = PAGE_SIZE;
+		}
+	} else {
+		rval = -EFAULT;
+		spin_unlock_bh(&chan->lock);
+		goto exit;
+	}
+	spin_unlock_bh(&chan->lock);
+
+	/* Fill the test page with ones */
+	memset(page_address(pg), 0xFF, PAGE_SIZE);
+	dma_addr = dma_map_page(chan->device->dev, pg, 0,
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Setup addresses */
+	ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
+	ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
+	addrs[0] = dma_addr;
+	addrs[1] = 0;
+	ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
+
+	async_tx_ack(&sw_desc->async_tx);
+	sw_desc->async_tx.callback = ppc440spe_test_callback;
+	sw_desc->async_tx.callback_param = NULL;
+
+	init_completion(&ppc440spe_r6_test_comp);
+
+	ppc440spe_adma_tx_submit(&sw_desc->async_tx);
+	ppc440spe_adma_issue_pending(&chan->common);
+
+	wait_for_completion(&ppc440spe_r6_test_comp);
+
+	/* Now check if the test page is zeroed */
+	a = page_address(pg);
+	if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) {
+		/* page is zero - RAID-6 enabled */
+		rval = 0;
+	} else {
+		/* RAID-6 was not enabled */
+		rval = -EINVAL;
+	}
+exit:
+	__free_page(pg);
+	return rval;
+}
+
+static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
+{
+	switch (adev->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
+		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+		dma_cap_set(DMA_MEMSET, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
+		dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
+		break;
+	case PPC440SPE_XOR_ID:
+		dma_cap_set(DMA_XOR, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ, adev->common.cap_mask);
+		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+		adev->common.cap_mask = adev->common.cap_mask;
+		break;
+	}
+
+	/* Set base routines */
+	adev->common.device_alloc_chan_resources =
+				ppc440spe_adma_alloc_chan_resources;
+	adev->common.device_free_chan_resources =
+				ppc440spe_adma_free_chan_resources;
+	adev->common.device_tx_status = ppc440spe_adma_tx_status;
+	adev->common.device_issue_pending = ppc440spe_adma_issue_pending;
+
+	/* Set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) {
+		adev->common.device_prep_dma_memcpy =
+			ppc440spe_adma_prep_dma_memcpy;
+	}
+	if (dma_has_cap(DMA_MEMSET, adev->common.cap_mask)) {
+		adev->common.device_prep_dma_memset =
+			ppc440spe_adma_prep_dma_memset;
+	}
+	if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) {
+		adev->common.max_xor = XOR_MAX_OPS;
+		adev->common.device_prep_dma_xor =
+			ppc440spe_adma_prep_dma_xor;
+	}
+	if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			dma_set_maxpq(&adev->common,
+				DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+			break;
+		case PPC440SPE_DMA1_ID:
+			dma_set_maxpq(&adev->common,
+				DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+			break;
+		case PPC440SPE_XOR_ID:
+			adev->common.max_pq = XOR_MAX_OPS * 3;
+			break;
+		}
+		adev->common.device_prep_dma_pq =
+			ppc440spe_adma_prep_dma_pq;
+	}
+	if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			adev->common.max_pq = DMA0_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		case PPC440SPE_DMA1_ID:
+			adev->common.max_pq = DMA1_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		}
+		adev->common.device_prep_dma_pq_val =
+			ppc440spe_adma_prep_dma_pqzero_sum;
+	}
+	if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			adev->common.max_xor = DMA0_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		case PPC440SPE_DMA1_ID:
+			adev->common.max_xor = DMA1_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		}
+		adev->common.device_prep_dma_xor_val =
+			ppc440spe_adma_prep_dma_xor_zero_sum;
+	}
+	if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) {
+		adev->common.device_prep_dma_interrupt =
+			ppc440spe_adma_prep_dma_interrupt;
+	}
+	pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
+	  "( %s%s%s%s%s%s%s)\n",
+	  dev_name(adev->dev),
+	  dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
+	  dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
+	  dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
+	  dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "",
+	  dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
+	  dma_has_cap(DMA_MEMSET, adev->common.cap_mask)  ? "memset " : "",
+	  dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : "");
+}
+
+static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
+				     struct ppc440spe_adma_chan *chan,
+				     int *initcode)
+{
+	struct platform_device *ofdev;
+	struct device_node *np;
+	int ret;
+
+	ofdev = container_of(adev->dev, struct platform_device, dev);
+	np = ofdev->dev.of_node;
+	if (adev->id != PPC440SPE_XOR_ID) {
+		adev->err_irq = irq_of_parse_and_map(np, 1);
+		if (adev->err_irq == NO_IRQ) {
+			dev_warn(adev->dev, "no err irq resource?\n");
+			*initcode = PPC_ADMA_INIT_IRQ2;
+			adev->err_irq = -ENXIO;
+		} else
+			atomic_inc(&ppc440spe_adma_err_irq_ref);
+	} else {
+		adev->err_irq = -ENXIO;
+	}
+
+	adev->irq = irq_of_parse_and_map(np, 0);
+	if (adev->irq == NO_IRQ) {
+		dev_err(adev->dev, "no irq resource\n");
+		*initcode = PPC_ADMA_INIT_IRQ1;
+		ret = -ENXIO;
+		goto err_irq_map;
+	}
+	dev_dbg(adev->dev, "irq %d, err irq %d\n",
+		adev->irq, adev->err_irq);
+
+	ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
+			  0, dev_driver_string(adev->dev), chan);
+	if (ret) {
+		dev_err(adev->dev, "can't request irq %d\n",
+			adev->irq);
+		*initcode = PPC_ADMA_INIT_IRQ1;
+		ret = -EIO;
+		goto err_req1;
+	}
+
+	/* only DMA engines have a separate error IRQ
+	 * so it's Ok if err_irq < 0 in XOR engine case.
+	 */
+	if (adev->err_irq > 0) {
+		/* both DMA engines share common error IRQ */
+		ret = request_irq(adev->err_irq,
+				  ppc440spe_adma_err_handler,
+				  IRQF_SHARED,
+				  dev_driver_string(adev->dev),
+				  chan);
+		if (ret) {
+			dev_err(adev->dev, "can't request irq %d\n",
+				adev->err_irq);
+			*initcode = PPC_ADMA_INIT_IRQ2;
+			ret = -EIO;
+			goto err_req2;
+		}
+	}
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		/* enable XOR engine interrupts */
+		iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+			    XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
+			    &adev->xor_reg->ier);
+	} else {
+		u32 mask, enable;
+
+		np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+		if (!np) {
+			pr_err("%s: can't find I2O device tree node\n",
+				__func__);
+			ret = -ENODEV;
+			goto err_req2;
+		}
+		adev->i2o_reg = of_iomap(np, 0);
+		if (!adev->i2o_reg) {
+			pr_err("%s: failed to map I2O registers\n", __func__);
+			of_node_put(np);
+			ret = -EINVAL;
+			goto err_req2;
+		}
+		of_node_put(np);
+		/* Unmask 'CS FIFO Attention' interrupts and
+		 * enable generating interrupts on errors
+		 */
+		enable = (adev->id == PPC440SPE_DMA0_ID) ?
+			 ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+			 ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+		mask = ioread32(&adev->i2o_reg->iopim) & enable;
+		iowrite32(mask, &adev->i2o_reg->iopim);
+	}
+	return 0;
+
+err_req2:
+	free_irq(adev->irq, chan);
+err_req1:
+	irq_dispose_mapping(adev->irq);
+err_irq_map:
+	if (adev->err_irq > 0) {
+		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
+			irq_dispose_mapping(adev->err_irq);
+	}
+	return ret;
+}
+
+static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
+					struct ppc440spe_adma_chan *chan)
+{
+	u32 mask, disable;
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		/* disable XOR engine interrupts */
+		mask = ioread32be(&adev->xor_reg->ier);
+		mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+			  XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
+		iowrite32be(mask, &adev->xor_reg->ier);
+	} else {
+		/* disable DMAx engine interrupts */
+		disable = (adev->id == PPC440SPE_DMA0_ID) ?
+			  (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+			  (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+		mask = ioread32(&adev->i2o_reg->iopim) | disable;
+		iowrite32(mask, &adev->i2o_reg->iopim);
+	}
+	free_irq(adev->irq, chan);
+	irq_dispose_mapping(adev->irq);
+	if (adev->err_irq > 0) {
+		free_irq(adev->err_irq, chan);
+		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
+			irq_dispose_mapping(adev->err_irq);
+			iounmap(adev->i2o_reg);
+		}
+	}
+}
+
+/**
+ * ppc440spe_adma_probe - probe the asynch device
+ */
+static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev)
+{
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	struct ppc440spe_adma_device *adev;
+	struct ppc440spe_adma_chan *chan;
+	struct ppc_dma_chan_ref *ref, *_ref;
+	int ret = 0, initcode = PPC_ADMA_INIT_OK;
+	const u32 *idx;
+	int len;
+	void *regs;
+	u32 id, pool_size;
+
+	if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
+		id = PPC440SPE_XOR_ID;
+		/* As far as the XOR engine is concerned, it does not
+		 * use FIFOs but uses linked list. So there is no dependency
+		 * between pool size to allocate and the engine configuration.
+		 */
+		pool_size = PAGE_SIZE << 1;
+	} else {
+		/* it is DMA0 or DMA1 */
+		idx = of_get_property(np, "cell-index", &len);
+		if (!idx || (len != sizeof(u32))) {
+			dev_err(&ofdev->dev, "Device node %s has missing "
+				"or invalid cell-index property\n",
+				np->full_name);
+			return -EINVAL;
+		}
+		id = *idx;
+		/* DMA0,1 engines use FIFO to maintain CDBs, so we
+		 * should allocate the pool accordingly to size of this
+		 * FIFO. Thus, the pool size depends on the FIFO depth:
+		 * how much CDBs pointers the FIFO may contain then so
+		 * much CDBs we should provide in the pool.
+		 * That is
+		 *   CDB size = 32B;
+		 *   CDBs number = (DMA0_FIFO_SIZE >> 3);
+		 *   Pool size = CDBs number * CDB size =
+		 *      = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
+		 */
+		pool_size = (id == PPC440SPE_DMA0_ID) ?
+			    DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+		pool_size <<= 2;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		dev_err(&ofdev->dev, "failed to get memory resource\n");
+		initcode = PPC_ADMA_INIT_MEMRES;
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!request_mem_region(res.start, resource_size(&res),
+				dev_driver_string(&ofdev->dev))) {
+		dev_err(&ofdev->dev, "failed to request memory region %pR\n",
+			&res);
+		initcode = PPC_ADMA_INIT_MEMREG;
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* create a device */
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev) {
+		dev_err(&ofdev->dev, "failed to allocate device\n");
+		initcode = PPC_ADMA_INIT_ALLOC;
+		ret = -ENOMEM;
+		goto err_adev_alloc;
+	}
+
+	adev->id = id;
+	adev->pool_size = pool_size;
+	/* allocate coherent memory for hardware descriptors */
+	adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev,
+					adev->pool_size, &adev->dma_desc_pool,
+					GFP_KERNEL);
+	if (adev->dma_desc_pool_virt == NULL) {
+		dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent "
+			"memory for hardware descriptors\n",
+			adev->pool_size);
+		initcode = PPC_ADMA_INIT_COHERENT;
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+	dev_dbg(&ofdev->dev, "allocted descriptor pool virt 0x%p phys 0x%llx\n",
+		adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool);
+
+	regs = ioremap(res.start, resource_size(&res));
+	if (!regs) {
+		dev_err(&ofdev->dev, "failed to ioremap regs!\n");
+		goto err_regs_alloc;
+	}
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		adev->xor_reg = regs;
+		/* Reset XOR */
+		iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
+		iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
+	} else {
+		size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
+				   DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+		adev->dma_reg = regs;
+		/* DMAx_FIFO_SIZE is defined in bytes,
+		 * <fsiz> - is defined in number of CDB pointers (8byte).
+		 * DMA FIFO Length = CSlength + CPlength, where
+		 * CSlength = CPlength = (fsiz + 1) * 8.
+		 */
+		iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
+			  &adev->dma_reg->fsiz);
+		/* Configure DMA engine */
+		iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
+			  &adev->dma_reg->cfg);
+		/* Clear Status */
+		iowrite32(~0, &adev->dma_reg->dsts);
+	}
+
+	adev->dev = &ofdev->dev;
+	adev->common.dev = &ofdev->dev;
+	INIT_LIST_HEAD(&adev->common.channels);
+	dev_set_drvdata(&ofdev->dev, adev);
+
+	/* create a channel */
+	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	if (!chan) {
+		dev_err(&ofdev->dev, "can't allocate channel structure\n");
+		initcode = PPC_ADMA_INIT_CHANNEL;
+		ret = -ENOMEM;
+		goto err_chan_alloc;
+	}
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->chain);
+	INIT_LIST_HEAD(&chan->all_slots);
+	chan->device = adev;
+	chan->common.device = &adev->common;
+	dma_cookie_init(&chan->common);
+	list_add_tail(&chan->common.device_node, &adev->common.channels);
+	tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
+		     (unsigned long)chan);
+
+	/* allocate and map helper pages for async validation or
+	 * async_mult/async_sum_product operations on DMA0/1.
+	 */
+	if (adev->id != PPC440SPE_XOR_ID) {
+		chan->pdest_page = alloc_page(GFP_KERNEL);
+		chan->qdest_page = alloc_page(GFP_KERNEL);
+		if (!chan->pdest_page ||
+		    !chan->qdest_page) {
+			if (chan->pdest_page)
+				__free_page(chan->pdest_page);
+			if (chan->qdest_page)
+				__free_page(chan->qdest_page);
+			ret = -ENOMEM;
+			goto err_page_alloc;
+		}
+		chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+		chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	ref = kmalloc(sizeof(*ref), GFP_KERNEL);
+	if (ref) {
+		ref->chan = &chan->common;
+		INIT_LIST_HEAD(&ref->node);
+		list_add_tail(&ref->node, &ppc440spe_adma_chan_list);
+	} else {
+		dev_err(&ofdev->dev, "failed to allocate channel reference!\n");
+		ret = -ENOMEM;
+		goto err_ref_alloc;
+	}
+
+	ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode);
+	if (ret)
+		goto err_irq;
+
+	ppc440spe_adma_init_capabilities(adev);
+
+	ret = dma_async_device_register(&adev->common);
+	if (ret) {
+		initcode = PPC_ADMA_INIT_REGISTER;
+		dev_err(&ofdev->dev, "failed to register dma device\n");
+		goto err_dev_reg;
+	}
+
+	goto out;
+
+err_dev_reg:
+	ppc440spe_adma_release_irqs(adev, chan);
+err_irq:
+	list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) {
+		if (chan == to_ppc440spe_adma_chan(ref->chan)) {
+			list_del(&ref->node);
+			kfree(ref);
+		}
+	}
+err_ref_alloc:
+	if (adev->id != PPC440SPE_XOR_ID) {
+		dma_unmap_page(&ofdev->dev, chan->pdest,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(&ofdev->dev, chan->qdest,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(chan->pdest_page);
+		__free_page(chan->qdest_page);
+	}
+err_page_alloc:
+	kfree(chan);
+err_chan_alloc:
+	if (adev->id == PPC440SPE_XOR_ID)
+		iounmap(adev->xor_reg);
+	else
+		iounmap(adev->dma_reg);
+err_regs_alloc:
+	dma_free_coherent(adev->dev, adev->pool_size,
+			  adev->dma_desc_pool_virt,
+			  adev->dma_desc_pool);
+err_dma_alloc:
+	kfree(adev);
+err_adev_alloc:
+	release_mem_region(res.start, resource_size(&res));
+out:
+	if (id < PPC440SPE_ADMA_ENGINES_NUM)
+		ppc440spe_adma_devices[id] = initcode;
+
+	return ret;
+}
+
+/**
+ * ppc440spe_adma_remove - remove the asynch device
+ */
+static int __devexit ppc440spe_adma_remove(struct platform_device *ofdev)
+{
+	struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev);
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	struct dma_chan *chan, *_chan;
+	struct ppc_dma_chan_ref *ref, *_ref;
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+
+	dev_set_drvdata(&ofdev->dev, NULL);
+	if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
+		ppc440spe_adma_devices[adev->id] = -1;
+
+	dma_async_device_unregister(&adev->common);
+
+	list_for_each_entry_safe(chan, _chan, &adev->common.channels,
+				 device_node) {
+		ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+		ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
+		tasklet_kill(&ppc440spe_chan->irq_tasklet);
+		if (adev->id != PPC440SPE_XOR_ID) {
+			dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+			dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__free_page(ppc440spe_chan->pdest_page);
+			__free_page(ppc440spe_chan->qdest_page);
+		}
+		list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
+					 node) {
+			if (ppc440spe_chan ==
+			    to_ppc440spe_adma_chan(ref->chan)) {
+				list_del(&ref->node);
+				kfree(ref);
+			}
+		}
+		list_del(&chan->device_node);
+		kfree(ppc440spe_chan);
+	}
+
+	dma_free_coherent(adev->dev, adev->pool_size,
+			  adev->dma_desc_pool_virt, adev->dma_desc_pool);
+	if (adev->id == PPC440SPE_XOR_ID)
+		iounmap(adev->xor_reg);
+	else
+		iounmap(adev->dma_reg);
+	of_address_to_resource(np, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+	kfree(adev);
+	return 0;
+}
+
+/*
+ * /sys driver interface to enable h/w RAID-6 capabilities
+ * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
+ * directory are "devices", "enable" and "poly".
+ * "devices" shows available engines.
+ * "enable" is used to enable RAID-6 capabilities or to check
+ * whether these has been activated.
+ * "poly" allows setting/checking used polynomial (for PPC440SPe only).
+ */
+
+static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
+{
+	ssize_t size = 0;
+	int i;
+
+	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
+		if (ppc440spe_adma_devices[i] == -1)
+			continue;
+		size += snprintf(buf + size, PAGE_SIZE - size,
+				 "PPC440SP(E)-ADMA.%d: %s\n", i,
+				 ppc_adma_errors[ppc440spe_adma_devices[i]]);
+	}
+	return size;
+}
+
+static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE,
+			"PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
+			ppc440spe_r6_enabled ? "EN" : "DIS");
+}
+
+static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
+					const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (!count || count > 11)
+		return -EINVAL;
+
+	if (!ppc440spe_r6_tchan)
+		return -EFAULT;
+
+	/* Write a key */
+	sscanf(buf, "%lx", &val);
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
+	isync();
+
+	/* Verify whether it really works now */
+	if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
+		pr_info("PPC440SP(e) RAID-6 has been activated "
+			"successfully\n");
+		ppc440spe_r6_enabled = 1;
+	} else {
+		pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
+			" Error key ?\n");
+		ppc440spe_r6_enabled = 0;
+	}
+	return count;
+}
+
+static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf)
+{
+	ssize_t size = 0;
+	u32 reg;
+
+#ifdef CONFIG_440SP
+	/* 440SP has fixed polynomial */
+	reg = 0x4d;
+#else
+	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+	reg >>= MQ0_CFBHL_POLY;
+	reg &= 0xFF;
+#endif
+
+	size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
+			"uses 0x1%02x polynomial.\n", reg);
+	return size;
+}
+
+static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
+				      const char *buf, size_t count)
+{
+	unsigned long reg, val;
+
+#ifdef CONFIG_440SP
+	/* 440SP uses default 0x14D polynomial only */
+	return -EINVAL;
+#endif
+
+	if (!count || count > 6)
+		return -EINVAL;
+
+	/* e.g., 0x14D or 0x11D */
+	sscanf(buf, "%lx", &val);
+
+	if (val & ~0x1FF)
+		return -EINVAL;
+
+	val &= 0xFF;
+	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+	reg &= ~(0xFF << MQ0_CFBHL_POLY);
+	reg |= val << MQ0_CFBHL_POLY;
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
+
+	return count;
+}
+
+static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL);
+static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable,
+		   store_ppc440spe_r6enable);
+static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly,
+		   store_ppc440spe_r6poly);
+
+/*
+ * Common initialisation for RAID engines; allocate memory for
+ * DMAx FIFOs, perform configuration common for all DMA engines.
+ * Further DMA engine specific configuration is done at probe time.
+ */
+static int ppc440spe_configure_raid_devices(void)
+{
+	struct device_node *np;
+	struct resource i2o_res;
+	struct i2o_regs __iomem *i2o_reg;
+	dcr_host_t i2o_dcr_host;
+	unsigned int dcr_base, dcr_len;
+	int i, ret;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+	if (!np) {
+		pr_err("%s: can't find I2O device tree node\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	if (of_address_to_resource(np, 0, &i2o_res)) {
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	i2o_reg = of_iomap(np, 0);
+	if (!i2o_reg) {
+		pr_err("%s: failed to map I2O registers\n", __func__);
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* Get I2O DCRs base */
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+	if (!dcr_base && !dcr_len) {
+		pr_err("%s: can't get DCR registers base/len!\n",
+			np->full_name);
+		of_node_put(np);
+		iounmap(i2o_reg);
+		return -ENODEV;
+	}
+
+	i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(i2o_dcr_host)) {
+		pr_err("%s: failed to map DCRs!\n", np->full_name);
+		of_node_put(np);
+		iounmap(i2o_reg);
+		return -ENODEV;
+	}
+	of_node_put(np);
+
+	/* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+	 * the base address of FIFO memory space.
+	 * Actually we need twice more physical memory than programmed in the
+	 * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
+	 */
+	ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
+					 GFP_KERNEL);
+	if (!ppc440spe_dma_fifo_buf) {
+		pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
+		iounmap(i2o_reg);
+		dcr_unmap(i2o_dcr_host, dcr_len);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Configure h/w
+	 */
+	/* Reset I2O/DMA */
+	mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
+	mtdcri(SDR0, DCRN_SDR0_SRST, 0);
+
+	/* Setup the base address of mmaped registers */
+	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
+	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
+						I2O_REG_ENABLE);
+	dcr_unmap(i2o_dcr_host, dcr_len);
+
+	/* Setup FIFO memory space base address */
+	iowrite32(0, &i2o_reg->ifbah);
+	iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
+
+	/* set zero FIFO size for I2O, so the whole
+	 * ppc440spe_dma_fifo_buf is used by DMAs.
+	 * DMAx_FIFOs will be configured while probe.
+	 */
+	iowrite32(0, &i2o_reg->ifsiz);
+	iounmap(i2o_reg);
+
+	/* To prepare WXOR/RXOR functionality we need access to
+	 * Memory Queue Module DCRs (finally it will be enabled
+	 * via /sys interface of the ppc440spe ADMA driver).
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
+	if (!np) {
+		pr_err("%s: can't find MQ device tree node\n",
+			__func__);
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	/* Get MQ DCRs base */
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+	if (!dcr_base && !dcr_len) {
+		pr_err("%s: can't get DCR registers base/len!\n",
+			np->full_name);
+		ret = -ENODEV;
+		goto out_mq;
+	}
+
+	ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
+		pr_err("%s: failed to map DCRs!\n", np->full_name);
+		ret = -ENODEV;
+		goto out_mq;
+	}
+	of_node_put(np);
+	ppc440spe_mq_dcr_len = dcr_len;
+
+	/* Set HB alias */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
+
+	/* Set:
+	 * - LL transaction passing limit to 1;
+	 * - Memory controller cycle limit to 1;
+	 * - Galois Polynomial to 0x14d (default)
+	 */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
+		  (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
+		  (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
+
+	atomic_set(&ppc440spe_adma_err_irq_ref, 0);
+	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
+		ppc440spe_adma_devices[i] = -1;
+
+	return 0;
+
+out_mq:
+	of_node_put(np);
+out_free:
+	kfree(ppc440spe_dma_fifo_buf);
+	return ret;
+}
+
+static const struct of_device_id ppc440spe_adma_of_match[] __devinitconst = {
+	{ .compatible	= "ibm,dma-440spe", },
+	{ .compatible	= "amcc,xor-accelerator", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
+
+static struct platform_driver ppc440spe_adma_driver = {
+	.probe = ppc440spe_adma_probe,
+	.remove = __devexit_p(ppc440spe_adma_remove),
+	.driver = {
+		.name = "PPC440SP(E)-ADMA",
+		.owner = THIS_MODULE,
+		.of_match_table = ppc440spe_adma_of_match,
+	},
+};
+
+static __init int ppc440spe_adma_init(void)
+{
+	int ret;
+
+	ret = ppc440spe_configure_raid_devices();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&ppc440spe_adma_driver);
+	if (ret) {
+		pr_err("%s: failed to register platform driver\n",
+			__func__);
+		goto out_reg;
+	}
+
+	/* Initialization status */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_devices);
+	if (ret)
+		goto out_dev;
+
+	/* RAID-6 h/w enable entry */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_enable);
+	if (ret)
+		goto out_en;
+
+	/* GF polynomial to use */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_poly);
+	if (!ret)
+		return ret;
+
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_enable);
+out_en:
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_devices);
+out_dev:
+	/* User will not be able to enable h/w RAID-6 */
+	pr_err("%s: failed to create RAID-6 driver interface\n",
+		__func__);
+	platform_driver_unregister(&ppc440spe_adma_driver);
+out_reg:
+	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+	kfree(ppc440spe_dma_fifo_buf);
+	return ret;
+}
+
+static void __exit ppc440spe_adma_exit(void)
+{
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_poly);
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_enable);
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_devices);
+	platform_driver_unregister(&ppc440spe_adma_driver);
+	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+	kfree(ppc440spe_dma_fifo_buf);
+}
+
+arch_initcall(ppc440spe_adma_init);
+module_exit(ppc440spe_adma_exit);
+
+MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>");
+MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h
new file mode 100644
index 00000000..26b7a5ed
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.h
@@ -0,0 +1,193 @@
+/*
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_ADMA_H
+#define _PPC440SPE_ADMA_H
+
+#include <linux/types.h>
+#include "dma.h"
+#include "xor.h"
+
+#define to_ppc440spe_adma_chan(chan) \
+		container_of(chan, struct ppc440spe_adma_chan, common)
+#define to_ppc440spe_adma_device(dev) \
+		container_of(dev, struct ppc440spe_adma_device, common)
+#define tx_to_ppc440spe_adma_slot(tx) \
+		container_of(tx, struct ppc440spe_adma_desc_slot, async_tx)
+
+/* Default polynomial (for 440SP is only available) */
+#define PPC440SPE_DEFAULT_POLY	0x4d
+
+#define PPC440SPE_ADMA_ENGINES_NUM	(XOR_ENGINES_NUM + DMA_ENGINES_NUM)
+
+#define PPC440SPE_ADMA_WATCHDOG_MSEC	3
+#define PPC440SPE_ADMA_THRESHOLD	1
+
+#define PPC440SPE_DMA0_ID	0
+#define PPC440SPE_DMA1_ID	1
+#define PPC440SPE_XOR_ID	2
+
+#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT	0xFFFFFFUL
+/* this is the XOR_CBBCR width */
+#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT	(1 << 31)
+#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT
+
+#define PPC440SPE_RXOR_RUN	0
+
+#define MQ0_CF2H_RXOR_BS_MASK	0x1FF
+
+#undef ADMA_LL_DEBUG
+
+/**
+ * struct ppc440spe_adma_device - internal representation of an ADMA device
+ * @dev: device
+ * @dma_reg: base for DMAx register access
+ * @xor_reg: base for XOR register access
+ * @i2o_reg: base for I2O register access
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @pool_size: size of the pool
+ * @irq: DMAx or XOR irq number
+ * @err_irq: DMAx error irq number
+ * @common: embedded struct dma_device
+ */
+struct ppc440spe_adma_device {
+	struct device *dev;
+	struct dma_regs __iomem *dma_reg;
+	struct xor_regs __iomem *xor_reg;
+	struct i2o_regs __iomem *i2o_reg;
+	int id;
+	void *dma_desc_pool_virt;
+	dma_addr_t dma_desc_pool;
+	size_t pool_size;
+	int irq;
+	int err_irq;
+	struct dma_device common;
+};
+
+/**
+ * struct ppc440spe_adma_chan - internal representation of an ADMA channel
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @device: parent device
+ * @chain: device chain view of the descriptors
+ * @common: common dmaengine channel object members
+ * @all_slots: complete domain of slots usable by the channel
+ * @pending: allows batching of hardware operations
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @hw_chain_inited: h/w descriptor chain initialization flag
+ * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs
+ * @needs_unmap: if buffers should not be unmapped upon final processing
+ * @pdest_page: P destination page for async validate operation
+ * @qdest_page: Q destination page for async validate operation
+ * @pdest: P dma addr for async validate operation
+ * @qdest: Q dma addr for async validate operation
+ */
+struct ppc440spe_adma_chan {
+	spinlock_t lock;
+	struct ppc440spe_adma_device *device;
+	struct list_head chain;
+	struct dma_chan common;
+	struct list_head all_slots;
+	struct ppc440spe_adma_desc_slot *last_used;
+	int pending;
+	int slots_allocated;
+	int hw_chain_inited;
+	struct tasklet_struct irq_tasklet;
+	u8 needs_unmap;
+	struct page *pdest_page;
+	struct page *qdest_page;
+	dma_addr_t pdest;
+	dma_addr_t qdest;
+};
+
+struct ppc440spe_rxor {
+	u32 addrl;
+	u32 addrh;
+	int len;
+	int xor_count;
+	int addr_count;
+	int desc_count;
+	int state;
+};
+
+/**
+ * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @hw_next: pointer to the next descriptor in chain
+ * @async_tx: support for the async_tx api
+ * @slot_node: node on the iop_adma_chan.all_slots list
+ * @chain_node: node on the op_adma_chan.chain list
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ *              for example transfer lengths larger than the supported hw max
+ * @unmap_len: transaction bytecount
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @stride: currently chained or not
+ * @idx: pool index
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @src_cnt: number of sources set in this descriptor
+ * @dst_cnt: number of destinations set in the descriptor
+ * @slots_per_op: number of slots per operation
+ * @descs_per_op: number of slot per P/Q operation see comment
+ *                for ppc440spe_prep_dma_pqxor function
+ * @flags: desc state/type
+ * @reverse_flags: 1 if a corresponding rxor address uses reversed address order
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct ppc440spe_adma_desc_slot {
+	dma_addr_t phys;
+	struct ppc440spe_adma_desc_slot *group_head;
+	struct ppc440spe_adma_desc_slot *hw_next;
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head slot_node;
+	struct list_head chain_node; /* node in channel ops list */
+	struct list_head group_list; /* list */
+	unsigned int unmap_len;
+	void *hw_desc;
+	u16 stride;
+	u16 idx;
+	u16 slot_cnt;
+	u8 src_cnt;
+	u8 dst_cnt;
+	u8 slots_per_op;
+	u8 descs_per_op;
+	unsigned long flags;
+	unsigned long reverse_flags[8];
+
+#define PPC440SPE_DESC_INT	0	/* generate interrupt on complete */
+#define PPC440SPE_ZERO_P	1	/* clear P destionaion */
+#define PPC440SPE_ZERO_Q	2	/* clear Q destination */
+#define PPC440SPE_COHERENT	3	/* src/dst are coherent */
+
+#define PPC440SPE_DESC_WXOR	4	/* WXORs are in chain */
+#define PPC440SPE_DESC_RXOR	5	/* RXOR is in chain */
+
+#define PPC440SPE_DESC_RXOR123	8	/* CDB for RXOR123 operation */
+#define PPC440SPE_DESC_RXOR124	9	/* CDB for RXOR124 operation */
+#define PPC440SPE_DESC_RXOR125	10	/* CDB for RXOR125 operation */
+#define PPC440SPE_DESC_RXOR12	11	/* CDB for RXOR12 operation */
+#define PPC440SPE_DESC_RXOR_REV	12	/* CDB has srcs in reversed order */
+
+#define PPC440SPE_DESC_PCHECK	13
+#define PPC440SPE_DESC_QCHECK	14
+
+#define PPC440SPE_DESC_RXOR_MSK	0x3
+
+	struct ppc440spe_rxor rxor_cursor;
+
+	union {
+		u32 *xor_check_result;
+		u32 *crc32_result;
+	};
+};
+
+#endif /* _PPC440SPE_ADMA_H */
diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h
new file mode 100644
index 00000000..bcde2df2
--- /dev/null
+++ b/drivers/dma/ppc4xx/dma.h
@@ -0,0 +1,223 @@
+/*
+ * 440SPe's DMA engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of  the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef	_PPC440SPE_DMA_H
+#define _PPC440SPE_DMA_H
+
+#include <linux/types.h>
+
+/* Number of elements in the array with statical CDBs */
+#define	MAX_STAT_DMA_CDBS	16
+/* Number of DMA engines available on the contoller */
+#define DMA_ENGINES_NUM		2
+
+/* Maximum h/w supported number of destinations */
+#define DMA_DEST_MAX_NUM	2
+
+/* FIFO's params */
+#define DMA0_FIFO_SIZE		0x1000
+#define DMA1_FIFO_SIZE		0x1000
+#define DMA_FIFO_ENABLE		(1<<12)
+
+/* DMA Configuration Register. Data Transfer Engine PLB Priority: */
+#define DMA_CFG_DXEPR_LP	(0<<26)
+#define DMA_CFG_DXEPR_HP	(3<<26)
+#define DMA_CFG_DXEPR_HHP	(2<<26)
+#define DMA_CFG_DXEPR_HHHP	(1<<26)
+
+/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */
+#define DMA_CFG_DFMPP_LP	(0<<23)
+#define DMA_CFG_DFMPP_HP	(3<<23)
+#define DMA_CFG_DFMPP_HHP	(2<<23)
+#define DMA_CFG_DFMPP_HHHP	(1<<23)
+
+/* DMA Configuration Register. Force 64-byte Alignment */
+#define DMA_CFG_FALGN		(1 << 19)
+
+/*UIC0:*/
+#define D0CPF_INT		(1<<12)
+#define D0CSF_INT		(1<<11)
+#define D1CPF_INT		(1<<10)
+#define D1CSF_INT		(1<<9)
+/*UIC1:*/
+#define DMAE_INT		(1<<9)
+
+/* I2O IOP Interrupt Mask Register */
+#define I2O_IOPIM_P0SNE		(1<<3)
+#define I2O_IOPIM_P0EM		(1<<5)
+#define I2O_IOPIM_P1SNE		(1<<6)
+#define I2O_IOPIM_P1EM		(1<<8)
+
+/* DMA CDB fields */
+#define DMA_CDB_MSK		(0xF)
+#define DMA_CDB_64B_ADDR	(1<<2)
+#define DMA_CDB_NO_INT		(1<<3)
+#define DMA_CDB_STATUS_MSK	(0x3)
+#define DMA_CDB_ADDR_MSK	(0xFFFFFFF0)
+
+/* DMA CDB OpCodes */
+#define DMA_CDB_OPC_NO_OP	(0x00)
+#define DMA_CDB_OPC_MV_SG1_SG2	(0x01)
+#define DMA_CDB_OPC_MULTICAST	(0x05)
+#define DMA_CDB_OPC_DFILL128	(0x24)
+#define DMA_CDB_OPC_DCHECK128	(0x23)
+
+#define DMA_CUED_XOR_BASE	(0x10000000)
+#define DMA_CUED_XOR_HB		(0x00000008)
+
+#ifdef CONFIG_440SP
+#define DMA_CUED_MULT1_OFF	0
+#define DMA_CUED_MULT2_OFF	8
+#define DMA_CUED_MULT3_OFF	16
+#define DMA_CUED_REGION_OFF	24
+#define DMA_CUED_XOR_WIN_MSK	(0xFC000000)
+#else
+#define DMA_CUED_MULT1_OFF	2
+#define DMA_CUED_MULT2_OFF	10
+#define DMA_CUED_MULT3_OFF	18
+#define DMA_CUED_REGION_OFF	26
+#define DMA_CUED_XOR_WIN_MSK	(0xF0000000)
+#endif
+
+#define DMA_CUED_REGION_MSK	0x3
+#define DMA_RXOR123		0x0
+#define DMA_RXOR124		0x1
+#define DMA_RXOR125		0x2
+#define DMA_RXOR12		0x3
+
+/* S/G addresses */
+#define DMA_CDB_SG_SRC		1
+#define DMA_CDB_SG_DST1		2
+#define DMA_CDB_SG_DST2		3
+
+/*
+ * DMAx engines Command Descriptor Block Type
+ */
+struct dma_cdb {
+	/*
+	 * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf)
+	 */
+	u8	pad0[2];        /* reserved */
+	u8	attr;		/* attributes */
+	u8	opc;		/* opcode */
+	u32	sg1u;		/* upper SG1 address */
+	u32	sg1l;		/* lower SG1 address */
+	u32	cnt;		/* SG count, 3B used */
+	u32	sg2u;		/* upper SG2 address */
+	u32	sg2l;		/* lower SG2 address */
+	u32	sg3u;		/* upper SG3 address */
+	u32	sg3l;		/* lower SG3 address */
+};
+
+/*
+ * DMAx hardware registers (p.515 in 440SPe UM 1.22)
+ */
+struct dma_regs {
+	u32	cpfpl;
+	u32	cpfph;
+	u32	csfpl;
+	u32	csfph;
+	u32	dsts;
+	u32	cfg;
+	u8	pad0[0x8];
+	u16	cpfhp;
+	u16	cpftp;
+	u16	csfhp;
+	u16	csftp;
+	u8	pad1[0x8];
+	u32	acpl;
+	u32	acph;
+	u32	s1bpl;
+	u32	s1bph;
+	u32	s2bpl;
+	u32	s2bph;
+	u32	s3bpl;
+	u32	s3bph;
+	u8	pad2[0x10];
+	u32	earl;
+	u32	earh;
+	u8	pad3[0x8];
+	u32	seat;
+	u32	sead;
+	u32	op;
+	u32	fsiz;
+};
+
+/*
+ * I2O hardware registers (p.528 in 440SPe UM 1.22)
+ */
+struct i2o_regs {
+	u32	ists;
+	u32	iseat;
+	u32	isead;
+	u8	pad0[0x14];
+	u32	idbel;
+	u8	pad1[0xc];
+	u32	ihis;
+	u32	ihim;
+	u8	pad2[0x8];
+	u32	ihiq;
+	u32	ihoq;
+	u8	pad3[0x8];
+	u32	iopis;
+	u32	iopim;
+	u32	iopiq;
+	u8	iopoq;
+	u8	pad4[3];
+	u16	iiflh;
+	u16	iiflt;
+	u16	iiplh;
+	u16	iiplt;
+	u16	ioflh;
+	u16	ioflt;
+	u16	ioplh;
+	u16	ioplt;
+	u32	iidc;
+	u32	ictl;
+	u32	ifcpp;
+	u8	pad5[0x4];
+	u16	mfac0;
+	u16	mfac1;
+	u16	mfac2;
+	u16	mfac3;
+	u16	mfac4;
+	u16	mfac5;
+	u16	mfac6;
+	u16	mfac7;
+	u16	ifcfh;
+	u16	ifcht;
+	u8	pad6[0x4];
+	u32	iifmc;
+	u32	iodb;
+	u32	iodbc;
+	u32	ifbal;
+	u32	ifbah;
+	u32	ifsiz;
+	u32	ispd0;
+	u32	ispd1;
+	u32	ispd2;
+	u32	ispd3;
+	u32	ihipl;
+	u32	ihiph;
+	u32	ihopl;
+	u32	ihoph;
+	u32	iiipl;
+	u32	iiiph;
+	u32	iiopl;
+	u32	iioph;
+	u32	ifcpl;
+	u32	ifcph;
+	u8	pad7[0x8];
+	u32	iopt;
+};
+
+#endif /* _PPC440SPE_DMA_H */
diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h
new file mode 100644
index 00000000..daed7384
--- /dev/null
+++ b/drivers/dma/ppc4xx/xor.h
@@ -0,0 +1,110 @@
+/*
+ * 440SPe's XOR engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of  the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_XOR_H
+#define _PPC440SPE_XOR_H
+
+#include <linux/types.h>
+
+/* Number of XOR engines available on the contoller */
+#define XOR_ENGINES_NUM		1
+
+/* Number of operands supported in the h/w */
+#define XOR_MAX_OPS		16
+
+/*
+ * XOR Command Block Control Register bits
+ */
+#define XOR_CBCR_LNK_BIT        (1<<31) /* link present */
+#define XOR_CBCR_TGT_BIT        (1<<30) /* target present */
+#define XOR_CBCR_CBCE_BIT       (1<<29) /* command block compete enable */
+#define XOR_CBCR_RNZE_BIT       (1<<28) /* result not zero enable */
+#define XOR_CBCR_XNOR_BIT       (1<<15) /* XOR/XNOR */
+#define XOR_CDCR_OAC_MSK        (0x7F)  /* operand address count */
+
+/*
+ * XORCore Status Register bits
+ */
+#define XOR_SR_XCP_BIT		(1<<31)	/* core processing */
+#define XOR_SR_ICB_BIT		(1<<17)	/* invalid CB */
+#define XOR_SR_IC_BIT		(1<<16)	/* invalid command */
+#define XOR_SR_IPE_BIT		(1<<15)	/* internal parity error */
+#define XOR_SR_RNZ_BIT		(1<<2)	/* result not Zero */
+#define XOR_SR_CBC_BIT		(1<<1)	/* CB complete */
+#define XOR_SR_CBLC_BIT		(1<<0)	/* CB list complete */
+
+/*
+ * XORCore Control Set and Reset Register bits
+ */
+#define XOR_CRSR_XASR_BIT	(1<<31)	/* soft reset */
+#define XOR_CRSR_XAE_BIT	(1<<30)	/* enable */
+#define XOR_CRSR_RCBE_BIT	(1<<29)	/* refetch CB enable */
+#define XOR_CRSR_PAUS_BIT	(1<<28)	/* pause */
+#define XOR_CRSR_64BA_BIT	(1<<27) /* 64/32 CB format */
+#define XOR_CRSR_CLP_BIT	(1<<25)	/* continue list processing */
+
+/*
+ * XORCore Interrupt Enable Register
+ */
+#define XOR_IE_ICBIE_BIT	(1<<17)	/* Invalid Command Block IRQ Enable */
+#define XOR_IE_ICIE_BIT		(1<<16)	/* Invalid Command IRQ Enable */
+#define XOR_IE_RPTIE_BIT	(1<<14)	/* Read PLB Timeout Error IRQ Enable */
+#define XOR_IE_CBCIE_BIT	(1<<1)	/* CB complete interrupt enable */
+#define XOR_IE_CBLCI_BIT	(1<<0)	/* CB list complete interrupt enable */
+
+/*
+ * XOR Accelerator engine Command Block Type
+ */
+struct xor_cb {
+	/*
+	 * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf)
+	 */
+	u32	cbc;		/* control */
+	u32	cbbc;		/* byte count */
+	u32	cbs;		/* status */
+	u8	pad0[4];	/* reserved */
+	u32	cbtah;		/* target address high */
+	u32	cbtal;		/* target address low */
+	u32	cblah;		/* link address high */
+	u32	cblal;		/* link address low */
+	struct {
+		u32 h;
+		u32 l;
+	} __attribute__ ((packed)) ops[16];
+} __attribute__ ((packed));
+
+/*
+ * XOR hardware registers Table 19-3, UM 1.22
+ */
+struct xor_regs {
+	u32	op_ar[16][2];	/* operand address[0]-high,[1]-low registers */
+	u8	pad0[352];	/* reserved */
+	u32	cbcr;		/* CB control register */
+	u32	cbbcr;		/* CB byte count register */
+	u32	cbsr;		/* CB status register */
+	u8	pad1[4];	/* reserved */
+	u32	cbtahr;		/* operand target address high register */
+	u32	cbtalr;		/* operand target address low register */
+	u32	cblahr;		/* CB link address high register */
+	u32	cblalr;		/* CB link address low register */
+	u32	crsr;		/* control set register */
+	u32	crrr;		/* control reset register */
+	u32	ccbahr;		/* current CB address high register */
+	u32	ccbalr;		/* current CB address low register */
+	u32	plbr;		/* PLB configuration register */
+	u32	ier;		/* interrupt enable register */
+	u32	pecr;		/* parity error count register */
+	u32	sr;		/* status register */
+	u32	revidr;		/* revision ID register */
+};
+
+#endif /* _PPC440SPE_XOR_H */
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
new file mode 100644
index 00000000..ec78ccef
--- /dev/null
+++ b/drivers/dma/sa11x0-dma.c
@@ -0,0 +1,1109 @@
+/*
+ * SA11x0 DMAengine support
+ *
+ * Copyright (C) 2012 Russell King
+ *   Derived in part from arch/arm/mach-sa1100/dma.c,
+ *   Copyright (C) 2000, 2001 by Nicolas Pitre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sa11x0-dma.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define NR_PHY_CHAN	6
+#define DMA_ALIGN	3
+#define DMA_MAX_SIZE	0x1fff
+#define DMA_CHUNK_SIZE	0x1000
+
+#define DMA_DDAR	0x00
+#define DMA_DCSR_S	0x04
+#define DMA_DCSR_C	0x08
+#define DMA_DCSR_R	0x0c
+#define DMA_DBSA	0x10
+#define DMA_DBTA	0x14
+#define DMA_DBSB	0x18
+#define DMA_DBTB	0x1c
+#define DMA_SIZE	0x20
+
+#define DCSR_RUN	(1 << 0)
+#define DCSR_IE		(1 << 1)
+#define DCSR_ERROR	(1 << 2)
+#define DCSR_DONEA	(1 << 3)
+#define DCSR_STRTA	(1 << 4)
+#define DCSR_DONEB	(1 << 5)
+#define DCSR_STRTB	(1 << 6)
+#define DCSR_BIU	(1 << 7)
+
+#define DDAR_RW		(1 << 0)	/* 0 = W, 1 = R */
+#define DDAR_E		(1 << 1)	/* 0 = LE, 1 = BE */
+#define DDAR_BS		(1 << 2)	/* 0 = BS4, 1 = BS8 */
+#define DDAR_DW		(1 << 3)	/* 0 = 8b, 1 = 16b */
+#define DDAR_Ser0UDCTr	(0x0 << 4)
+#define DDAR_Ser0UDCRc	(0x1 << 4)
+#define DDAR_Ser1SDLCTr	(0x2 << 4)
+#define DDAR_Ser1SDLCRc	(0x3 << 4)
+#define DDAR_Ser1UARTTr	(0x4 << 4)
+#define DDAR_Ser1UARTRc	(0x5 << 4)
+#define DDAR_Ser2ICPTr	(0x6 << 4)
+#define DDAR_Ser2ICPRc	(0x7 << 4)
+#define DDAR_Ser3UARTTr	(0x8 << 4)
+#define DDAR_Ser3UARTRc	(0x9 << 4)
+#define DDAR_Ser4MCP0Tr	(0xa << 4)
+#define DDAR_Ser4MCP0Rc	(0xb << 4)
+#define DDAR_Ser4MCP1Tr	(0xc << 4)
+#define DDAR_Ser4MCP1Rc	(0xd << 4)
+#define DDAR_Ser4SSPTr	(0xe << 4)
+#define DDAR_Ser4SSPRc	(0xf << 4)
+
+struct sa11x0_dma_sg {
+	u32			addr;
+	u32			len;
+};
+
+struct sa11x0_dma_desc {
+	struct dma_async_tx_descriptor tx;
+	u32			ddar;
+	size_t			size;
+
+	/* maybe protected by c->lock */
+	struct list_head	node;
+	unsigned		sglen;
+	struct sa11x0_dma_sg	sg[0];
+};
+
+struct sa11x0_dma_phy;
+
+struct sa11x0_dma_chan {
+	struct dma_chan		chan;
+	spinlock_t		lock;
+	dma_cookie_t		lc;
+
+	/* protected by c->lock */
+	struct sa11x0_dma_phy	*phy;
+	enum dma_status		status;
+	struct list_head	desc_submitted;
+	struct list_head	desc_issued;
+
+	/* protected by d->lock */
+	struct list_head	node;
+
+	u32			ddar;
+	const char		*name;
+};
+
+struct sa11x0_dma_phy {
+	void __iomem		*base;
+	struct sa11x0_dma_dev	*dev;
+	unsigned		num;
+
+	struct sa11x0_dma_chan	*vchan;
+
+	/* Protected by c->lock */
+	unsigned		sg_load;
+	struct sa11x0_dma_desc	*txd_load;
+	unsigned		sg_done;
+	struct sa11x0_dma_desc	*txd_done;
+#ifdef CONFIG_PM_SLEEP
+	u32			dbs[2];
+	u32			dbt[2];
+	u32			dcsr;
+#endif
+};
+
+struct sa11x0_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	spinlock_t		lock;
+	struct tasklet_struct	task;
+	struct list_head	chan_pending;
+	struct list_head	desc_complete;
+	struct sa11x0_dma_phy	phy[NR_PHY_CHAN];
+};
+
+static struct sa11x0_dma_chan *to_sa11x0_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sa11x0_dma_chan, chan);
+}
+
+static struct sa11x0_dma_dev *to_sa11x0_dma(struct dma_device *dmadev)
+{
+	return container_of(dmadev, struct sa11x0_dma_dev, slave);
+}
+
+static struct sa11x0_dma_desc *to_sa11x0_dma_tx(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct sa11x0_dma_desc, tx);
+}
+
+static struct sa11x0_dma_desc *sa11x0_dma_next_desc(struct sa11x0_dma_chan *c)
+{
+	if (list_empty(&c->desc_issued))
+		return NULL;
+
+	return list_first_entry(&c->desc_issued, struct sa11x0_dma_desc, node);
+}
+
+static void sa11x0_dma_start_desc(struct sa11x0_dma_phy *p, struct sa11x0_dma_desc *txd)
+{
+	list_del(&txd->node);
+	p->txd_load = txd;
+	p->sg_load = 0;
+
+	dev_vdbg(p->dev->slave.dev, "pchan %u: txd %p[%x]: starting: DDAR:%x\n",
+		p->num, txd, txd->tx.cookie, txd->ddar);
+}
+
+static void noinline sa11x0_dma_start_sg(struct sa11x0_dma_phy *p,
+	struct sa11x0_dma_chan *c)
+{
+	struct sa11x0_dma_desc *txd = p->txd_load;
+	struct sa11x0_dma_sg *sg;
+	void __iomem *base = p->base;
+	unsigned dbsx, dbtx;
+	u32 dcsr;
+
+	if (!txd)
+		return;
+
+	dcsr = readl_relaxed(base + DMA_DCSR_R);
+
+	/* Don't try to load the next transfer if both buffers are started */
+	if ((dcsr & (DCSR_STRTA | DCSR_STRTB)) == (DCSR_STRTA | DCSR_STRTB))
+		return;
+
+	if (p->sg_load == txd->sglen) {
+		struct sa11x0_dma_desc *txn = sa11x0_dma_next_desc(c);
+
+		/*
+		 * We have reached the end of the current descriptor.
+		 * Peek at the next descriptor, and if compatible with
+		 * the current, start processing it.
+		 */
+		if (txn && txn->ddar == txd->ddar) {
+			txd = txn;
+			sa11x0_dma_start_desc(p, txn);
+		} else {
+			p->txd_load = NULL;
+			return;
+		}
+	}
+
+	sg = &txd->sg[p->sg_load++];
+
+	/* Select buffer to load according to channel status */
+	if (((dcsr & (DCSR_BIU | DCSR_STRTB)) == (DCSR_BIU | DCSR_STRTB)) ||
+	    ((dcsr & (DCSR_BIU | DCSR_STRTA)) == 0)) {
+		dbsx = DMA_DBSA;
+		dbtx = DMA_DBTA;
+		dcsr = DCSR_STRTA | DCSR_IE | DCSR_RUN;
+	} else {
+		dbsx = DMA_DBSB;
+		dbtx = DMA_DBTB;
+		dcsr = DCSR_STRTB | DCSR_IE | DCSR_RUN;
+	}
+
+	writel_relaxed(sg->addr, base + dbsx);
+	writel_relaxed(sg->len, base + dbtx);
+	writel(dcsr, base + DMA_DCSR_S);
+
+	dev_dbg(p->dev->slave.dev, "pchan %u: load: DCSR:%02x DBS%c:%08x DBT%c:%08x\n",
+		p->num, dcsr,
+		'A' + (dbsx == DMA_DBSB), sg->addr,
+		'A' + (dbtx == DMA_DBTB), sg->len);
+}
+
+static void noinline sa11x0_dma_complete(struct sa11x0_dma_phy *p,
+	struct sa11x0_dma_chan *c)
+{
+	struct sa11x0_dma_desc *txd = p->txd_done;
+
+	if (++p->sg_done == txd->sglen) {
+		struct sa11x0_dma_dev *d = p->dev;
+
+		dev_vdbg(d->slave.dev, "pchan %u: txd %p[%x]: completed\n",
+			p->num, p->txd_done, p->txd_done->tx.cookie);
+
+		c->lc = txd->tx.cookie;
+
+		spin_lock(&d->lock);
+		list_add_tail(&txd->node, &d->desc_complete);
+		spin_unlock(&d->lock);
+
+		p->sg_done = 0;
+		p->txd_done = p->txd_load;
+
+		tasklet_schedule(&d->task);
+	}
+
+	sa11x0_dma_start_sg(p, c);
+}
+
+static irqreturn_t sa11x0_dma_irq(int irq, void *dev_id)
+{
+	struct sa11x0_dma_phy *p = dev_id;
+	struct sa11x0_dma_dev *d = p->dev;
+	struct sa11x0_dma_chan *c;
+	u32 dcsr;
+
+	dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+	if (!(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB)))
+		return IRQ_NONE;
+
+	/* Clear reported status bits */
+	writel_relaxed(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB),
+		p->base + DMA_DCSR_C);
+
+	dev_dbg(d->slave.dev, "pchan %u: irq: DCSR:%02x\n", p->num, dcsr);
+
+	if (dcsr & DCSR_ERROR) {
+		dev_err(d->slave.dev, "pchan %u: error. DCSR:%02x DDAR:%08x DBSA:%08x DBTA:%08x DBSB:%08x DBTB:%08x\n",
+			p->num, dcsr,
+			readl_relaxed(p->base + DMA_DDAR),
+			readl_relaxed(p->base + DMA_DBSA),
+			readl_relaxed(p->base + DMA_DBTA),
+			readl_relaxed(p->base + DMA_DBSB),
+			readl_relaxed(p->base + DMA_DBTB));
+	}
+
+	c = p->vchan;
+	if (c) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&c->lock, flags);
+		/*
+		 * Now that we're holding the lock, check that the vchan
+		 * really is associated with this pchan before touching the
+		 * hardware.  This should always succeed, because we won't
+		 * change p->vchan or c->phy while the channel is actively
+		 * transferring.
+		 */
+		if (c->phy == p) {
+			if (dcsr & DCSR_DONEA)
+				sa11x0_dma_complete(p, c);
+			if (dcsr & DCSR_DONEB)
+				sa11x0_dma_complete(p, c);
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void sa11x0_dma_start_txd(struct sa11x0_dma_chan *c)
+{
+	struct sa11x0_dma_desc *txd = sa11x0_dma_next_desc(c);
+
+	/* If the issued list is empty, we have no further txds to process */
+	if (txd) {
+		struct sa11x0_dma_phy *p = c->phy;
+
+		sa11x0_dma_start_desc(p, txd);
+		p->txd_done = txd;
+		p->sg_done = 0;
+
+		/* The channel should not have any transfers started */
+		WARN_ON(readl_relaxed(p->base + DMA_DCSR_R) &
+				      (DCSR_STRTA | DCSR_STRTB));
+
+		/* Clear the run and start bits before changing DDAR */
+		writel_relaxed(DCSR_RUN | DCSR_STRTA | DCSR_STRTB,
+			       p->base + DMA_DCSR_C);
+		writel_relaxed(txd->ddar, p->base + DMA_DDAR);
+
+		/* Try to start both buffers */
+		sa11x0_dma_start_sg(p, c);
+		sa11x0_dma_start_sg(p, c);
+	}
+}
+
+static void sa11x0_dma_tasklet(unsigned long arg)
+{
+	struct sa11x0_dma_dev *d = (struct sa11x0_dma_dev *)arg;
+	struct sa11x0_dma_phy *p;
+	struct sa11x0_dma_chan *c;
+	struct sa11x0_dma_desc *txd, *txn;
+	LIST_HEAD(head);
+	unsigned pch, pch_alloc = 0;
+
+	dev_dbg(d->slave.dev, "tasklet enter\n");
+
+	/* Get the completed tx descriptors */
+	spin_lock_irq(&d->lock);
+	list_splice_init(&d->desc_complete, &head);
+	spin_unlock_irq(&d->lock);
+
+	list_for_each_entry(txd, &head, node) {
+		c = to_sa11x0_dma_chan(txd->tx.chan);
+
+		dev_dbg(d->slave.dev, "vchan %p: txd %p[%x] completed\n",
+			c, txd, txd->tx.cookie);
+
+		spin_lock_irq(&c->lock);
+		p = c->phy;
+		if (p) {
+			if (!p->txd_done)
+				sa11x0_dma_start_txd(c);
+			if (!p->txd_done) {
+				/* No current txd associated with this channel */
+				dev_dbg(d->slave.dev, "pchan %u: free\n", p->num);
+
+				/* Mark this channel free */
+				c->phy = NULL;
+				p->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&c->lock);
+	}
+
+	spin_lock_irq(&d->lock);
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		p = &d->phy[pch];
+
+		if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+			c = list_first_entry(&d->chan_pending,
+				struct sa11x0_dma_chan, node);
+			list_del_init(&c->node);
+
+			pch_alloc |= 1 << pch;
+
+			/* Mark this channel allocated */
+			p->vchan = c;
+
+			dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, c);
+		}
+	}
+	spin_unlock_irq(&d->lock);
+
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+
+			spin_lock_irq(&c->lock);
+			c->phy = p;
+
+			sa11x0_dma_start_txd(c);
+			spin_unlock_irq(&c->lock);
+		}
+	}
+
+	/* Now free the completed tx descriptor, and call their callbacks */
+	list_for_each_entry_safe(txd, txn, &head, node) {
+		dma_async_tx_callback callback = txd->tx.callback;
+		void *callback_param = txd->tx.callback_param;
+
+		dev_dbg(d->slave.dev, "txd %p[%x]: callback and free\n",
+			txd, txd->tx.cookie);
+
+		kfree(txd);
+
+		if (callback)
+			callback(callback_param);
+	}
+
+	dev_dbg(d->slave.dev, "tasklet exit\n");
+}
+
+
+static void sa11x0_dma_desc_free(struct sa11x0_dma_dev *d, struct list_head *head)
+{
+	struct sa11x0_dma_desc *txd, *txn;
+
+	list_for_each_entry_safe(txd, txn, head, node) {
+		dev_dbg(d->slave.dev, "txd %p: freeing\n", txd);
+		kfree(txd);
+	}
+}
+
+static int sa11x0_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void sa11x0_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&c->lock, flags);
+	spin_lock(&d->lock);
+	list_del_init(&c->node);
+	spin_unlock(&d->lock);
+
+	list_splice_tail_init(&c->desc_submitted, &head);
+	list_splice_tail_init(&c->desc_issued, &head);
+	spin_unlock_irqrestore(&c->lock, flags);
+
+	sa11x0_dma_desc_free(d, &head);
+}
+
+static dma_addr_t sa11x0_dma_pos(struct sa11x0_dma_phy *p)
+{
+	unsigned reg;
+	u32 dcsr;
+
+	dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+
+	if ((dcsr & (DCSR_BIU | DCSR_STRTA)) == DCSR_STRTA ||
+	    (dcsr & (DCSR_BIU | DCSR_STRTB)) == DCSR_BIU)
+		reg = DMA_DBSA;
+	else
+		reg = DMA_DBSB;
+
+	return readl_relaxed(p->base + reg);
+}
+
+static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	struct sa11x0_dma_phy *p;
+	struct sa11x0_dma_desc *txd;
+	dma_cookie_t last_used, last_complete;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	last_used = c->chan.cookie;
+	last_complete = c->lc;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS) {
+		dma_set_tx_state(state, last_complete, last_used, 0);
+		return ret;
+	}
+
+	spin_lock_irqsave(&c->lock, flags);
+	p = c->phy;
+	ret = c->status;
+	if (p) {
+		dma_addr_t addr = sa11x0_dma_pos(p);
+
+		dev_vdbg(d->slave.dev, "tx_status: addr:%x\n", addr);
+
+		txd = p->txd_done;
+		if (txd) {
+			unsigned i;
+
+			for (i = 0; i < txd->sglen; i++) {
+				dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x\n",
+					i, txd->sg[i].addr, txd->sg[i].len);
+				if (addr >= txd->sg[i].addr &&
+				    addr < txd->sg[i].addr + txd->sg[i].len) {
+					unsigned len;
+
+					len = txd->sg[i].len -
+						(addr - txd->sg[i].addr);
+					dev_vdbg(d->slave.dev, "tx_status: [%u] +%x\n",
+						i, len);
+					bytes += len;
+					i++;
+					break;
+				}
+			}
+			for (; i < txd->sglen; i++) {
+				dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x ++\n",
+					i, txd->sg[i].addr, txd->sg[i].len);
+				bytes += txd->sg[i].len;
+			}
+		}
+		if (txd != p->txd_load && p->txd_load)
+			bytes += p->txd_load->size;
+	}
+	list_for_each_entry(txd, &c->desc_issued, node) {
+		bytes += txd->size;
+	}
+	spin_unlock_irqrestore(&c->lock, flags);
+
+	dma_set_tx_state(state, last_complete, last_used, bytes);
+
+	dev_vdbg(d->slave.dev, "tx_status: bytes 0x%zx\n", bytes);
+
+	return ret;
+}
+
+/*
+ * Move pending txds to the issued list, and re-init pending list.
+ * If not already pending, add this channel to the list of pending
+ * channels and trigger the tasklet to run.
+ */
+static void sa11x0_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->lock, flags);
+	list_splice_tail_init(&c->desc_submitted, &c->desc_issued);
+	if (!list_empty(&c->desc_issued)) {
+		spin_lock(&d->lock);
+		if (!c->phy && list_empty(&c->node)) {
+			list_add_tail(&c->node, &d->chan_pending);
+			tasklet_schedule(&d->task);
+			dev_dbg(d->slave.dev, "vchan %p: issued\n", c);
+		}
+		spin_unlock(&d->lock);
+	} else
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", c);
+	spin_unlock_irqrestore(&c->lock, flags);
+}
+
+static dma_cookie_t sa11x0_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(tx->chan);
+	struct sa11x0_dma_desc *txd = to_sa11x0_dma_tx(tx);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->lock, flags);
+	c->chan.cookie += 1;
+	if (c->chan.cookie < 0)
+		c->chan.cookie = 1;
+	txd->tx.cookie = c->chan.cookie;
+
+	list_add_tail(&txd->node, &c->desc_submitted);
+	spin_unlock_irqrestore(&c->lock, flags);
+
+	dev_dbg(tx->chan->device->dev, "vchan %p: txd %p[%x]: submitted\n",
+		c, txd, txd->tx.cookie);
+
+	return txd->tx.cookie;
+}
+
+static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sg, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_desc *txd;
+	struct scatterlist *sgent;
+	unsigned i, j = sglen;
+	size_t size = 0;
+
+	/* SA11x0 channels can only operate in their native direction */
+	if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) {
+		dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n",
+			c, c->ddar, dir);
+		return NULL;
+	}
+
+	/* Do not allow zero-sized txds */
+	if (sglen == 0)
+		return NULL;
+
+	for_each_sg(sg, sgent, sglen, i) {
+		dma_addr_t addr = sg_dma_address(sgent);
+		unsigned int len = sg_dma_len(sgent);
+
+		if (len > DMA_MAX_SIZE)
+			j += DIV_ROUND_UP(len, DMA_MAX_SIZE & ~DMA_ALIGN) - 1;
+		if (addr & DMA_ALIGN) {
+			dev_dbg(chan->device->dev, "vchan %p: bad buffer alignment: %08x\n",
+				c, addr);
+			return NULL;
+		}
+	}
+
+	txd = kzalloc(sizeof(*txd) + j * sizeof(txd->sg[0]), GFP_ATOMIC);
+	if (!txd) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", c);
+		return NULL;
+	}
+
+	j = 0;
+	for_each_sg(sg, sgent, sglen, i) {
+		dma_addr_t addr = sg_dma_address(sgent);
+		unsigned len = sg_dma_len(sgent);
+
+		size += len;
+
+		do {
+			unsigned tlen = len;
+
+			/*
+			 * Check whether the transfer will fit.  If not, try
+			 * to split the transfer up such that we end up with
+			 * equal chunks - but make sure that we preserve the
+			 * alignment.  This avoids small segments.
+			 */
+			if (tlen > DMA_MAX_SIZE) {
+				unsigned mult = DIV_ROUND_UP(tlen,
+					DMA_MAX_SIZE & ~DMA_ALIGN);
+
+				tlen = (tlen / mult) & ~DMA_ALIGN;
+			}
+
+			txd->sg[j].addr = addr;
+			txd->sg[j].len = tlen;
+
+			addr += tlen;
+			len -= tlen;
+			j++;
+		} while (len);
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, &c->chan);
+	txd->tx.flags = flags;
+	txd->tx.tx_submit = sa11x0_dma_tx_submit;
+	txd->ddar = c->ddar;
+	txd->size = size;
+	txd->sglen = j;
+
+	dev_dbg(chan->device->dev, "vchan %p: txd %p: size %u nr %u\n",
+		c, txd, txd->size, txd->sglen);
+
+	return &txd->tx;
+}
+
+static int sa11x0_dma_slave_config(struct sa11x0_dma_chan *c, struct dma_slave_config *cfg)
+{
+	u32 ddar = c->ddar & ((0xf << 4) | DDAR_RW);
+	dma_addr_t addr;
+	enum dma_slave_buswidth width;
+	u32 maxburst;
+
+	if (ddar & DDAR_RW) {
+		addr = cfg->src_addr;
+		width = cfg->src_addr_width;
+		maxburst = cfg->src_maxburst;
+	} else {
+		addr = cfg->dst_addr;
+		width = cfg->dst_addr_width;
+		maxburst = cfg->dst_maxburst;
+	}
+
+	if ((width != DMA_SLAVE_BUSWIDTH_1_BYTE &&
+	     width != DMA_SLAVE_BUSWIDTH_2_BYTES) ||
+	    (maxburst != 4 && maxburst != 8))
+		return -EINVAL;
+
+	if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		ddar |= DDAR_DW;
+	if (maxburst == 8)
+		ddar |= DDAR_BS;
+
+	dev_dbg(c->chan.device->dev, "vchan %p: dma_slave_config addr %x width %u burst %u\n",
+		c, addr, width, maxburst);
+
+	c->ddar = ddar | (addr & 0xf0000000) | (addr & 0x003ffffc) << 6;
+
+	return 0;
+}
+
+static int sa11x0_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	struct sa11x0_dma_phy *p;
+	LIST_HEAD(head);
+	unsigned long flags;
+	int ret;
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		return sa11x0_dma_slave_config(c, (struct dma_slave_config *)arg);
+
+	case DMA_TERMINATE_ALL:
+		dev_dbg(d->slave.dev, "vchan %p: terminate all\n", c);
+		/* Clear the tx descriptor lists */
+		spin_lock_irqsave(&c->lock, flags);
+		list_splice_tail_init(&c->desc_submitted, &head);
+		list_splice_tail_init(&c->desc_issued, &head);
+
+		p = c->phy;
+		if (p) {
+			struct sa11x0_dma_desc *txd, *txn;
+
+			dev_dbg(d->slave.dev, "pchan %u: terminating\n", p->num);
+			/* vchan is assigned to a pchan - stop the channel */
+			writel(DCSR_RUN | DCSR_IE |
+				DCSR_STRTA | DCSR_DONEA |
+				DCSR_STRTB | DCSR_DONEB,
+				p->base + DMA_DCSR_C);
+
+			list_for_each_entry_safe(txd, txn, &d->desc_complete, node)
+				if (txd->tx.chan == &c->chan)
+					list_move(&txd->node, &head);
+
+			if (p->txd_load) {
+				if (p->txd_load != p->txd_done)
+					list_add_tail(&p->txd_load->node, &head);
+				p->txd_load = NULL;
+			}
+			if (p->txd_done) {
+				list_add_tail(&p->txd_done->node, &head);
+				p->txd_done = NULL;
+			}
+			c->phy = NULL;
+			spin_lock(&d->lock);
+			p->vchan = NULL;
+			spin_unlock(&d->lock);
+			tasklet_schedule(&d->task);
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+		sa11x0_dma_desc_free(d, &head);
+		ret = 0;
+		break;
+
+	case DMA_PAUSE:
+		dev_dbg(d->slave.dev, "vchan %p: pause\n", c);
+		spin_lock_irqsave(&c->lock, flags);
+		if (c->status == DMA_IN_PROGRESS) {
+			c->status = DMA_PAUSED;
+
+			p = c->phy;
+			if (p) {
+				writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C);
+			} else {
+				spin_lock(&d->lock);
+				list_del_init(&c->node);
+				spin_unlock(&d->lock);
+			}
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+		ret = 0;
+		break;
+
+	case DMA_RESUME:
+		dev_dbg(d->slave.dev, "vchan %p: resume\n", c);
+		spin_lock_irqsave(&c->lock, flags);
+		if (c->status == DMA_PAUSED) {
+			c->status = DMA_IN_PROGRESS;
+
+			p = c->phy;
+			if (p) {
+				writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_S);
+			} else if (!list_empty(&c->desc_issued)) {
+				spin_lock(&d->lock);
+				list_add_tail(&c->node, &d->chan_pending);
+				spin_unlock(&d->lock);
+			}
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+		ret = 0;
+		break;
+
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+struct sa11x0_dma_channel_desc {
+	u32 ddar;
+	const char *name;
+};
+
+#define CD(d1, d2) { .ddar = DDAR_##d1 | d2, .name = #d1 }
+static const struct sa11x0_dma_channel_desc chan_desc[] = {
+	CD(Ser0UDCTr, 0),
+	CD(Ser0UDCRc, DDAR_RW),
+	CD(Ser1SDLCTr, 0),
+	CD(Ser1SDLCRc, DDAR_RW),
+	CD(Ser1UARTTr, 0),
+	CD(Ser1UARTRc, DDAR_RW),
+	CD(Ser2ICPTr, 0),
+	CD(Ser2ICPRc, DDAR_RW),
+	CD(Ser3UARTTr, 0),
+	CD(Ser3UARTRc, DDAR_RW),
+	CD(Ser4MCP0Tr, 0),
+	CD(Ser4MCP0Rc, DDAR_RW),
+	CD(Ser4MCP1Tr, 0),
+	CD(Ser4MCP1Rc, DDAR_RW),
+	CD(Ser4SSPTr, 0),
+	CD(Ser4SSPRc, DDAR_RW),
+};
+
+static int __devinit sa11x0_dma_init_dmadev(struct dma_device *dmadev,
+	struct device *dev)
+{
+	unsigned i;
+
+	dmadev->chancnt = ARRAY_SIZE(chan_desc);
+	INIT_LIST_HEAD(&dmadev->channels);
+	dmadev->dev = dev;
+	dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources;
+	dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources;
+	dmadev->device_control = sa11x0_dma_control;
+	dmadev->device_tx_status = sa11x0_dma_tx_status;
+	dmadev->device_issue_pending = sa11x0_dma_issue_pending;
+
+	for (i = 0; i < dmadev->chancnt; i++) {
+		struct sa11x0_dma_chan *c;
+
+		c = kzalloc(sizeof(*c), GFP_KERNEL);
+		if (!c) {
+			dev_err(dev, "no memory for channel %u\n", i);
+			return -ENOMEM;
+		}
+
+		c->chan.device = dmadev;
+		c->status = DMA_IN_PROGRESS;
+		c->ddar = chan_desc[i].ddar;
+		c->name = chan_desc[i].name;
+		spin_lock_init(&c->lock);
+		INIT_LIST_HEAD(&c->desc_submitted);
+		INIT_LIST_HEAD(&c->desc_issued);
+		INIT_LIST_HEAD(&c->node);
+		list_add_tail(&c->chan.device_node, &dmadev->channels);
+	}
+
+	return dma_async_device_register(dmadev);
+}
+
+static int sa11x0_dma_request_irq(struct platform_device *pdev, int nr,
+	void *data)
+{
+	int irq = platform_get_irq(pdev, nr);
+
+	if (irq <= 0)
+		return -ENXIO;
+
+	return request_irq(irq, sa11x0_dma_irq, 0, dev_name(&pdev->dev), data);
+}
+
+static void sa11x0_dma_free_irq(struct platform_device *pdev, int nr,
+	void *data)
+{
+	int irq = platform_get_irq(pdev, nr);
+	if (irq > 0)
+		free_irq(irq, data);
+}
+
+static void sa11x0_dma_free_channels(struct dma_device *dmadev)
+{
+	struct sa11x0_dma_chan *c, *cn;
+
+	list_for_each_entry_safe(c, cn, &dmadev->channels, chan.device_node) {
+		list_del(&c->chan.device_node);
+		kfree(c);
+	}
+}
+
+static int __devinit sa11x0_dma_probe(struct platform_device *pdev)
+{
+	struct sa11x0_dma_dev *d;
+	struct resource *res;
+	unsigned i;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENXIO;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+	INIT_LIST_HEAD(&d->desc_complete);
+
+	d->base = ioremap(res->start, resource_size(res));
+	if (!d->base) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	tasklet_init(&d->task, sa11x0_dma_tasklet, (unsigned long)d);
+
+	for (i = 0; i < NR_PHY_CHAN; i++) {
+		struct sa11x0_dma_phy *p = &d->phy[i];
+
+		p->dev = d;
+		p->num = i;
+		p->base = d->base + i * DMA_SIZE;
+		writel_relaxed(DCSR_RUN | DCSR_IE | DCSR_ERROR |
+			DCSR_DONEA | DCSR_STRTA | DCSR_DONEB | DCSR_STRTB,
+			p->base + DMA_DCSR_C);
+		writel_relaxed(0, p->base + DMA_DDAR);
+
+		ret = sa11x0_dma_request_irq(pdev, i, p);
+		if (ret) {
+			while (i) {
+				i--;
+				sa11x0_dma_free_irq(pdev, i, &d->phy[i]);
+			}
+			goto err_irq;
+		}
+	}
+
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg;
+	ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev);
+	if (ret) {
+		dev_warn(d->slave.dev, "failed to register slave async device: %d\n",
+			ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, d);
+	return 0;
+
+ err_slave_reg:
+	sa11x0_dma_free_channels(&d->slave);
+	for (i = 0; i < NR_PHY_CHAN; i++)
+		sa11x0_dma_free_irq(pdev, i, &d->phy[i]);
+ err_irq:
+	tasklet_kill(&d->task);
+	iounmap(d->base);
+ err_ioremap:
+	kfree(d);
+ err_alloc:
+	return ret;
+}
+
+static int __devexit sa11x0_dma_remove(struct platform_device *pdev)
+{
+	struct sa11x0_dma_dev *d = platform_get_drvdata(pdev);
+	unsigned pch;
+
+	dma_async_device_unregister(&d->slave);
+
+	sa11x0_dma_free_channels(&d->slave);
+	for (pch = 0; pch < NR_PHY_CHAN; pch++)
+		sa11x0_dma_free_irq(pdev, pch, &d->phy[pch]);
+	tasklet_kill(&d->task);
+	iounmap(d->base);
+	kfree(d);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sa11x0_dma_suspend(struct device *dev)
+{
+	struct sa11x0_dma_dev *d = dev_get_drvdata(dev);
+	unsigned pch;
+
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		struct sa11x0_dma_phy *p = &d->phy[pch];
+		u32 dcsr, saved_dcsr;
+
+		dcsr = saved_dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+		if (dcsr & DCSR_RUN) {
+			writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C);
+			dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+		}
+
+		saved_dcsr &= DCSR_RUN | DCSR_IE;
+		if (dcsr & DCSR_BIU) {
+			p->dbs[0] = readl_relaxed(p->base + DMA_DBSB);
+			p->dbt[0] = readl_relaxed(p->base + DMA_DBTB);
+			p->dbs[1] = readl_relaxed(p->base + DMA_DBSA);
+			p->dbt[1] = readl_relaxed(p->base + DMA_DBTA);
+			saved_dcsr |= (dcsr & DCSR_STRTA ? DCSR_STRTB : 0) |
+				      (dcsr & DCSR_STRTB ? DCSR_STRTA : 0);
+		} else {
+			p->dbs[0] = readl_relaxed(p->base + DMA_DBSA);
+			p->dbt[0] = readl_relaxed(p->base + DMA_DBTA);
+			p->dbs[1] = readl_relaxed(p->base + DMA_DBSB);
+			p->dbt[1] = readl_relaxed(p->base + DMA_DBTB);
+			saved_dcsr |= dcsr & (DCSR_STRTA | DCSR_STRTB);
+		}
+		p->dcsr = saved_dcsr;
+
+		writel(DCSR_STRTA | DCSR_STRTB, p->base + DMA_DCSR_C);
+	}
+
+	return 0;
+}
+
+static int sa11x0_dma_resume(struct device *dev)
+{
+	struct sa11x0_dma_dev *d = dev_get_drvdata(dev);
+	unsigned pch;
+
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		struct sa11x0_dma_phy *p = &d->phy[pch];
+		struct sa11x0_dma_desc *txd = NULL;
+		u32 dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+
+		WARN_ON(dcsr & (DCSR_BIU | DCSR_STRTA | DCSR_STRTB | DCSR_RUN));
+
+		if (p->txd_done)
+			txd = p->txd_done;
+		else if (p->txd_load)
+			txd = p->txd_load;
+
+		if (!txd)
+			continue;
+
+		writel_relaxed(txd->ddar, p->base + DMA_DDAR);
+
+		writel_relaxed(p->dbs[0], p->base + DMA_DBSA);
+		writel_relaxed(p->dbt[0], p->base + DMA_DBTA);
+		writel_relaxed(p->dbs[1], p->base + DMA_DBSB);
+		writel_relaxed(p->dbt[1], p->base + DMA_DBTB);
+		writel_relaxed(p->dcsr, p->base + DMA_DCSR_S);
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops sa11x0_dma_pm_ops = {
+	.suspend_noirq = sa11x0_dma_suspend,
+	.resume_noirq = sa11x0_dma_resume,
+	.freeze_noirq = sa11x0_dma_suspend,
+	.thaw_noirq = sa11x0_dma_resume,
+	.poweroff_noirq = sa11x0_dma_suspend,
+	.restore_noirq = sa11x0_dma_resume,
+};
+
+static struct platform_driver sa11x0_dma_driver = {
+	.driver = {
+		.name	= "sa11x0-dma",
+		.owner	= THIS_MODULE,
+		.pm	= &sa11x0_dma_pm_ops,
+	},
+	.probe		= sa11x0_dma_probe,
+	.remove		= __devexit_p(sa11x0_dma_remove),
+};
+
+bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->driver == &sa11x0_dma_driver.driver) {
+		struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+		const char *p = param;
+
+		return !strcmp(c->name, p);
+	}
+	return false;
+}
+EXPORT_SYMBOL(sa11x0_dma_filter_fn);
+
+static int __init sa11x0_dma_init(void)
+{
+	return platform_driver_register(&sa11x0_dma_driver);
+}
+subsys_initcall(sa11x0_dma_init);
+
+static void __exit sa11x0_dma_exit(void)
+{
+	platform_driver_unregister(&sa11x0_dma_driver);
+}
+module_exit(sa11x0_dma_exit);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("SA-11x0 DMA driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:sa11x0-dma");
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
new file mode 100644
index 00000000..19d7a8d3
--- /dev/null
+++ b/drivers/dma/shdma.c
@@ -0,0 +1,1524 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/spinlock.h>
+#include <linux/rculist.h>
+
+#include "dmaengine.h"
+#include "shdma.h"
+
+/* DMA descriptor control */
+enum sh_dmae_desc_status {
+	DESC_IDLE,
+	DESC_PREPARED,
+	DESC_SUBMITTED,
+	DESC_COMPLETED,	/* completed, have to call callback */
+	DESC_WAITING,	/* callback called, waiting for ack / re-submit */
+};
+
+#define NR_DESCS_PER_CHANNEL 32
+/* Default MEMCPY transfer size = 2^2 = 4 bytes */
+#define LOG2_DEFAULT_XFER_SIZE	2
+
+/*
+ * Used for write-side mutual exclusion for the global device list,
+ * read-side synchronization by way of RCU, and per-controller data.
+ */
+static DEFINE_SPINLOCK(sh_dmae_lock);
+static LIST_HEAD(sh_dmae_devices);
+
+/* A bitmask with bits enough for enum sh_dmae_slave_chan_id */
+static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
+
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
+
+static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, shdev->chan_reg +
+		     shdev->pdata->channel[sh_dc->id].chclr_offset);
+}
+
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+	__raw_writel(data, sh_dc->base + reg / sizeof(u32));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+	return __raw_readl(sh_dc->base + reg / sizeof(u32));
+}
+
+static u16 dmaor_read(struct sh_dmae_device *shdev)
+{
+	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+
+	if (shdev->pdata->dmaor_is_32bit)
+		return __raw_readl(addr);
+	else
+		return __raw_readw(addr);
+}
+
+static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
+{
+	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+
+	if (shdev->pdata->dmaor_is_32bit)
+		__raw_writel(data, addr);
+	else
+		__raw_writew(data, addr);
+}
+
+static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, sh_dc->base + shdev->chcr_offset / sizeof(u32));
+}
+
+static u32 chcr_read(struct sh_dmae_chan *sh_dc)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	return __raw_readl(sh_dc->base + shdev->chcr_offset / sizeof(u32));
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
+{
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
+
+	dmaor = dmaor_read(shdev);
+	dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+}
+
+static int sh_dmae_rst(struct sh_dmae_device *shdev)
+{
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
+
+	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+
+	if (shdev->pdata->chclr_present) {
+		int i;
+		for (i = 0; i < shdev->pdata->channel_num; i++) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+			if (sh_chan)
+				chclr_write(sh_chan, 0);
+		}
+	}
+
+	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
+
+	dmaor = dmaor_read(shdev);
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+
+	if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
+		dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
+		return -EIO;
+	}
+	if (shdev->pdata->dmaor_init & ~dmaor)
+		dev_warn(shdev->common.dev,
+			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+			 dmaor, shdev->pdata->dmaor_init);
+	return 0;
+}
+
+static bool dmae_is_busy(struct sh_dmae_chan *sh_chan)
+{
+	u32 chcr = chcr_read(sh_chan);
+
+	if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE)
+		return true; /* working */
+
+	return false; /* waiting */
+}
+
+static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
+		((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
+
+	if (cnt >= pdata->ts_shift_num)
+		cnt = 0;
+
+	return pdata->ts_shift[cnt];
+}
+
+static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	int i;
+
+	for (i = 0; i < pdata->ts_shift_num; i++)
+		if (pdata->ts_shift[i] == l2size)
+			break;
+
+	if (i == pdata->ts_shift_num)
+		i = 0;
+
+	return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) |
+		((i << pdata->ts_high_shift) & pdata->ts_high_mask);
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
+{
+	sh_dmae_writel(sh_chan, hw->sar, SAR);
+	sh_dmae_writel(sh_chan, hw->dar, DAR);
+	sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	u32 chcr = chcr_read(sh_chan);
+
+	if (shdev->pdata->needs_tend_set)
+		sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND);
+
+	chcr |= CHCR_DE | shdev->chcr_ie_bit;
+	chcr_write(sh_chan, chcr & ~CHCR_TE);
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	u32 chcr = chcr_read(sh_chan);
+
+	chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit);
+	chcr_write(sh_chan, chcr);
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+	/*
+	 * Default configuration for dual address memory-memory transfer.
+	 * 0x400 represents auto-request.
+	 */
+	u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan,
+						   LOG2_DEFAULT_XFER_SIZE);
+	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
+	chcr_write(sh_chan, chcr);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+	/* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
+	if (dmae_is_busy(sh_chan))
+		return -EBUSY;
+
+	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val);
+	chcr_write(sh_chan, val);
+
+	return 0;
+}
+
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->id];
+	u16 __iomem *addr = shdev->dmars;
+	unsigned int shift = chan_pdata->dmars_bit;
+
+	if (dmae_is_busy(sh_chan))
+		return -EBUSY;
+
+	if (pdata->no_dmars)
+		return 0;
+
+	/* in the case of a missing DMARS resource use first memory window */
+	if (!addr)
+		addr = (u16 __iomem *)shdev->chan_reg;
+	addr += chan_pdata->dmars / sizeof(u16);
+
+	__raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
+		     addr);
+
+	return 0;
+}
+
+static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
+	struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+	struct sh_dmae_slave *param = tx->chan->private;
+	dma_async_tx_callback callback = tx->callback;
+	dma_cookie_t cookie;
+	bool power_up;
+
+	spin_lock_irq(&sh_chan->desc_lock);
+
+	if (list_empty(&sh_chan->ld_queue))
+		power_up = true;
+	else
+		power_up = false;
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Mark all chunks of this descriptor as submitted, move to the queue */
+	list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
+		/*
+		 * All chunks are on the global ld_free, so, we have to find
+		 * the end of the chain ourselves
+		 */
+		if (chunk != desc && (chunk->mark == DESC_IDLE ||
+				      chunk->async_tx.cookie > 0 ||
+				      chunk->async_tx.cookie == -EBUSY ||
+				      &chunk->node == &sh_chan->ld_free))
+			break;
+		chunk->mark = DESC_SUBMITTED;
+		/* Callback goes to the last chunk */
+		chunk->async_tx.callback = NULL;
+		chunk->cookie = cookie;
+		list_move_tail(&chunk->node, &sh_chan->ld_queue);
+		last = chunk;
+	}
+
+	last->async_tx.callback = callback;
+	last->async_tx.callback_param = tx->callback_param;
+
+	dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n",
+		tx->cookie, &last->async_tx, sh_chan->id,
+		desc->hw.sar, desc->hw.tcr, desc->hw.dar);
+
+	if (power_up) {
+		sh_chan->pm_state = DMAE_PM_BUSY;
+
+		pm_runtime_get(sh_chan->dev);
+
+		spin_unlock_irq(&sh_chan->desc_lock);
+
+		pm_runtime_barrier(sh_chan->dev);
+
+		spin_lock_irq(&sh_chan->desc_lock);
+
+		/* Have we been reset, while waiting? */
+		if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) {
+			dev_dbg(sh_chan->dev, "Bring up channel %d\n",
+				sh_chan->id);
+			if (param) {
+				const struct sh_dmae_slave_config *cfg =
+					param->config;
+
+				dmae_set_dmars(sh_chan, cfg->mid_rid);
+				dmae_set_chcr(sh_chan, cfg->chcr);
+			} else {
+				dmae_init(sh_chan);
+			}
+
+			if (sh_chan->pm_state == DMAE_PM_PENDING)
+				sh_chan_xfer_ld_queue(sh_chan);
+			sh_chan->pm_state = DMAE_PM_ESTABLISHED;
+		}
+	} else {
+		sh_chan->pm_state = DMAE_PM_PENDING;
+	}
+
+	spin_unlock_irq(&sh_chan->desc_lock);
+
+	return cookie;
+}
+
+/* Called with desc_lock held */
+static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_desc *desc;
+
+	list_for_each_entry(desc, &sh_chan->ld_free, node)
+		if (desc->mark != DESC_PREPARED) {
+			BUG_ON(desc->mark != DESC_IDLE);
+			list_del(&desc->node);
+			return desc;
+		}
+
+	return NULL;
+}
+
+static const struct sh_dmae_slave_config *sh_dmae_find_slave(
+	struct sh_dmae_chan *sh_chan, struct sh_dmae_slave *param)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	int i;
+
+	if (param->slave_id >= SH_DMA_SLAVE_NUMBER)
+		return NULL;
+
+	for (i = 0; i < pdata->slave_num; i++)
+		if (pdata->slave[i].slave_id == param->slave_id)
+			return pdata->slave + i;
+
+	return NULL;
+}
+
+static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+	struct sh_desc *desc;
+	struct sh_dmae_slave *param = chan->private;
+	int ret;
+
+	/*
+	 * This relies on the guarantee from dmaengine that alloc_chan_resources
+	 * never runs concurrently with itself or free_chan_resources.
+	 */
+	if (param) {
+		const struct sh_dmae_slave_config *cfg;
+
+		cfg = sh_dmae_find_slave(sh_chan, param);
+		if (!cfg) {
+			ret = -EINVAL;
+			goto efindslave;
+		}
+
+		if (test_and_set_bit(param->slave_id, sh_dmae_slave_used)) {
+			ret = -EBUSY;
+			goto etestused;
+		}
+
+		param->config = cfg;
+	}
+
+	while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
+		desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
+		if (!desc)
+			break;
+		dma_async_tx_descriptor_init(&desc->async_tx,
+					&sh_chan->common);
+		desc->async_tx.tx_submit = sh_dmae_tx_submit;
+		desc->mark = DESC_IDLE;
+
+		list_add(&desc->node, &sh_chan->ld_free);
+		sh_chan->descs_allocated++;
+	}
+
+	if (!sh_chan->descs_allocated) {
+		ret = -ENOMEM;
+		goto edescalloc;
+	}
+
+	return sh_chan->descs_allocated;
+
+edescalloc:
+	if (param)
+		clear_bit(param->slave_id, sh_dmae_slave_used);
+etestused:
+efindslave:
+	chan->private = NULL;
+	return ret;
+}
+
+/*
+ * sh_dma_free_chan_resources - Free all resources of the channel.
+ */
+static void sh_dmae_free_chan_resources(struct dma_chan *chan)
+{
+	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+	struct sh_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	/* Protect against ISR */
+	spin_lock_irq(&sh_chan->desc_lock);
+	dmae_halt(sh_chan);
+	spin_unlock_irq(&sh_chan->desc_lock);
+
+	/* Now no new interrupts will occur */
+
+	/* Prepared and not submitted descriptors can still be on the queue */
+	if (!list_empty(&sh_chan->ld_queue))
+		sh_dmae_chan_ld_cleanup(sh_chan, true);
+
+	if (chan->private) {
+		/* The caller is holding dma_list_mutex */
+		struct sh_dmae_slave *param = chan->private;
+		clear_bit(param->slave_id, sh_dmae_slave_used);
+		chan->private = NULL;
+	}
+
+	spin_lock_irq(&sh_chan->desc_lock);
+
+	list_splice_init(&sh_chan->ld_free, &list);
+	sh_chan->descs_allocated = 0;
+
+	spin_unlock_irq(&sh_chan->desc_lock);
+
+	list_for_each_entry_safe(desc, _desc, &list, node)
+		kfree(desc);
+}
+
+/**
+ * sh_dmae_add_desc - get, set up and return one transfer descriptor
+ * @sh_chan:	DMA channel
+ * @flags:	DMA transfer flags
+ * @dest:	destination DMA address, incremented when direction equals
+ *		DMA_DEV_TO_MEM
+ * @src:	source DMA address, incremented when direction equals
+ *		DMA_MEM_TO_DEV
+ * @len:	DMA transfer length
+ * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
+ * @direction:	needed for slave DMA to decide which address to keep constant,
+ *		equals DMA_MEM_TO_MEM for MEMCPY
+ * Returns 0 or an error
+ * Locks: called with desc_lock held
+ */
+static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
+	unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
+	struct sh_desc **first, enum dma_transfer_direction direction)
+{
+	struct sh_desc *new;
+	size_t copy_size;
+
+	if (!*len)
+		return NULL;
+
+	/* Allocate the link descriptor from the free list */
+	new = sh_dmae_get_desc(sh_chan);
+	if (!new) {
+		dev_err(sh_chan->dev, "No free link descriptor available\n");
+		return NULL;
+	}
+
+	copy_size = min(*len, (size_t)SH_DMA_TCR_MAX + 1);
+
+	new->hw.sar = *src;
+	new->hw.dar = *dest;
+	new->hw.tcr = copy_size;
+
+	if (!*first) {
+		/* First desc */
+		new->async_tx.cookie = -EBUSY;
+		*first = new;
+	} else {
+		/* Other desc - invisible to the user */
+		new->async_tx.cookie = -EINVAL;
+	}
+
+	dev_dbg(sh_chan->dev,
+		"chaining (%u/%u)@%x -> %x with %p, cookie %d, shift %d\n",
+		copy_size, *len, *src, *dest, &new->async_tx,
+		new->async_tx.cookie, sh_chan->xmit_shift);
+
+	new->mark = DESC_PREPARED;
+	new->async_tx.flags = flags;
+	new->direction = direction;
+
+	*len -= copy_size;
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
+		*src += copy_size;
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
+		*dest += copy_size;
+
+	return new;
+}
+
+/*
+ * sh_dmae_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
+	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	struct scatterlist *sg;
+	struct sh_desc *first = NULL, *new = NULL /* compiler... */;
+	LIST_HEAD(tx_list);
+	int chunks = 0;
+	unsigned long irq_flags;
+	int i;
+
+	if (!sg_len)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i)
+		chunks += (sg_dma_len(sg) + SH_DMA_TCR_MAX) /
+			(SH_DMA_TCR_MAX + 1);
+
+	/* Have to lock the whole loop to protect against concurrent release */
+	spin_lock_irqsave(&sh_chan->desc_lock, irq_flags);
+
+	/*
+	 * Chaining:
+	 * first descriptor is what user is dealing with in all API calls, its
+	 *	cookie is at first set to -EBUSY, at tx-submit to a positive
+	 *	number
+	 * if more than one chunk is needed further chunks have cookie = -EINVAL
+	 * the last chunk, if not equal to the first, has cookie = -ENOSPC
+	 * all chunks are linked onto the tx_list head with their .node heads
+	 *	only during this function, then they are immediately spliced
+	 *	back onto the free list in form of a chain
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		if (!len)
+			goto err_get_desc;
+
+		do {
+			dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
+				i, sg, len, (unsigned long long)sg_addr);
+
+			if (direction == DMA_DEV_TO_MEM)
+				new = sh_dmae_add_desc(sh_chan, flags,
+						&sg_addr, addr, &len, &first,
+						direction);
+			else
+				new = sh_dmae_add_desc(sh_chan, flags,
+						addr, &sg_addr, &len, &first,
+						direction);
+			if (!new)
+				goto err_get_desc;
+
+			new->chunks = chunks--;
+			list_add_tail(&new->node, &tx_list);
+		} while (len);
+	}
+
+	if (new != first)
+		new->async_tx.cookie = -ENOSPC;
+
+	/* Put them back on the free list, so, they don't get lost */
+	list_splice_tail(&tx_list, &sh_chan->ld_free);
+
+	spin_unlock_irqrestore(&sh_chan->desc_lock, irq_flags);
+
+	return &first->async_tx;
+
+err_get_desc:
+	list_for_each_entry(new, &tx_list, node)
+		new->mark = DESC_IDLE;
+	list_splice(&tx_list, &sh_chan->ld_free);
+
+	spin_unlock_irqrestore(&sh_chan->desc_lock, irq_flags);
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
+	struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct sh_dmae_chan *sh_chan;
+	struct scatterlist sg;
+
+	if (!chan || !len)
+		return NULL;
+
+	sh_chan = to_sh_chan(chan);
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len,
+		    offset_in_page(dma_src));
+	sg_dma_address(&sg) = dma_src;
+	sg_dma_len(&sg) = len;
+
+	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+			       flags);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct sh_dmae_slave *param;
+	struct sh_dmae_chan *sh_chan;
+	dma_addr_t slave_addr;
+
+	if (!chan)
+		return NULL;
+
+	sh_chan = to_sh_chan(chan);
+	param = chan->private;
+
+	/* Someone calling slave DMA on a public channel? */
+	if (!param || !sg_len) {
+		dev_warn(sh_chan->dev, "%s: bad parameter: %p, %d, %d\n",
+			 __func__, param, sg_len, param ? param->slave_id : -1);
+		return NULL;
+	}
+
+	slave_addr = param->config->addr;
+
+	/*
+	 * if (param != NULL), this is a successfully requested slave channel,
+	 * therefore param->config != NULL too.
+	 */
+	return sh_dmae_prep_sg(sh_chan, sgl, sg_len, &slave_addr,
+			       direction, flags);
+}
+
+static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			   unsigned long arg)
+{
+	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+	unsigned long flags;
+
+	/* Only supports DMA_TERMINATE_ALL */
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	if (!chan)
+		return -EINVAL;
+
+	spin_lock_irqsave(&sh_chan->desc_lock, flags);
+	dmae_halt(sh_chan);
+
+	if (!list_empty(&sh_chan->ld_queue)) {
+		/* Record partial transfer */
+		struct sh_desc *desc = list_entry(sh_chan->ld_queue.next,
+						  struct sh_desc, node);
+		desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) <<
+			sh_chan->xmit_shift;
+	}
+	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
+
+	sh_dmae_chan_ld_cleanup(sh_chan, true);
+
+	return 0;
+}
+
+static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
+{
+	struct sh_desc *desc, *_desc;
+	/* Is the "exposed" head of a chain acked? */
+	bool head_acked = false;
+	dma_cookie_t cookie = 0;
+	dma_async_tx_callback callback = NULL;
+	void *param = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_chan->desc_lock, flags);
+	list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
+		struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+		BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
+		BUG_ON(desc->mark != DESC_SUBMITTED &&
+		       desc->mark != DESC_COMPLETED &&
+		       desc->mark != DESC_WAITING);
+
+		/*
+		 * queue is ordered, and we use this loop to (1) clean up all
+		 * completed descriptors, and to (2) update descriptor flags of
+		 * any chunks in a (partially) completed chain
+		 */
+		if (!all && desc->mark == DESC_SUBMITTED &&
+		    desc->cookie != cookie)
+			break;
+
+		if (tx->cookie > 0)
+			cookie = tx->cookie;
+
+		if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
+			if (sh_chan->common.completed_cookie != desc->cookie - 1)
+				dev_dbg(sh_chan->dev,
+					"Completing cookie %d, expected %d\n",
+					desc->cookie,
+					sh_chan->common.completed_cookie + 1);
+			sh_chan->common.completed_cookie = desc->cookie;
+		}
+
+		/* Call callback on the last chunk */
+		if (desc->mark == DESC_COMPLETED && tx->callback) {
+			desc->mark = DESC_WAITING;
+			callback = tx->callback;
+			param = tx->callback_param;
+			dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n",
+				tx->cookie, tx, sh_chan->id);
+			BUG_ON(desc->chunks != 1);
+			break;
+		}
+
+		if (tx->cookie > 0 || tx->cookie == -EBUSY) {
+			if (desc->mark == DESC_COMPLETED) {
+				BUG_ON(tx->cookie < 0);
+				desc->mark = DESC_WAITING;
+			}
+			head_acked = async_tx_test_ack(tx);
+		} else {
+			switch (desc->mark) {
+			case DESC_COMPLETED:
+				desc->mark = DESC_WAITING;
+				/* Fall through */
+			case DESC_WAITING:
+				if (head_acked)
+					async_tx_ack(&desc->async_tx);
+			}
+		}
+
+		dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n",
+			tx, tx->cookie);
+
+		if (((desc->mark == DESC_COMPLETED ||
+		      desc->mark == DESC_WAITING) &&
+		     async_tx_test_ack(&desc->async_tx)) || all) {
+			/* Remove from ld_queue list */
+			desc->mark = DESC_IDLE;
+
+			list_move(&desc->node, &sh_chan->ld_free);
+
+			if (list_empty(&sh_chan->ld_queue)) {
+				dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id);
+				pm_runtime_put(sh_chan->dev);
+			}
+		}
+	}
+
+	if (all && !callback)
+		/*
+		 * Terminating and the loop completed normally: forgive
+		 * uncompleted cookies
+		 */
+		sh_chan->common.completed_cookie = sh_chan->common.cookie;
+
+	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
+
+	if (callback)
+		callback(param);
+
+	return callback;
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function cleans up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
+{
+	while (__ld_cleanup(sh_chan, all))
+		;
+}
+
+/* Called under spin_lock_irq(&sh_chan->desc_lock) */
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_desc *desc;
+
+	/* DMA work check */
+	if (dmae_is_busy(sh_chan))
+		return;
+
+	/* Find the first not transferred descriptor */
+	list_for_each_entry(desc, &sh_chan->ld_queue, node)
+		if (desc->mark == DESC_SUBMITTED) {
+			dev_dbg(sh_chan->dev, "Queue #%d to %d: %u@%x -> %x\n",
+				desc->async_tx.cookie, sh_chan->id,
+				desc->hw.tcr, desc->hw.sar, desc->hw.dar);
+			/* Get the ld start address from ld_queue */
+			dmae_set_reg(sh_chan, &desc->hw);
+			dmae_start(sh_chan);
+			break;
+		}
+}
+
+static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
+{
+	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+
+	spin_lock_irq(&sh_chan->desc_lock);
+	if (sh_chan->pm_state == DMAE_PM_ESTABLISHED)
+		sh_chan_xfer_ld_queue(sh_chan);
+	else
+		sh_chan->pm_state = DMAE_PM_PENDING;
+	spin_unlock_irq(&sh_chan->desc_lock);
+}
+
+static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+	enum dma_status status;
+	unsigned long flags;
+
+	sh_dmae_chan_ld_cleanup(sh_chan, false);
+
+	spin_lock_irqsave(&sh_chan->desc_lock, flags);
+
+	status = dma_cookie_status(chan, cookie, txstate);
+
+	/*
+	 * If we don't find cookie on the queue, it has been aborted and we have
+	 * to report error
+	 */
+	if (status != DMA_SUCCESS) {
+		struct sh_desc *desc;
+		status = DMA_ERROR;
+		list_for_each_entry(desc, &sh_chan->ld_queue, node)
+			if (desc->cookie == cookie) {
+				status = DMA_IN_PROGRESS;
+				break;
+			}
+	}
+
+	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
+
+	return status;
+}
+
+static irqreturn_t sh_dmae_interrupt(int irq, void *data)
+{
+	irqreturn_t ret = IRQ_NONE;
+	struct sh_dmae_chan *sh_chan = data;
+	u32 chcr;
+
+	spin_lock(&sh_chan->desc_lock);
+
+	chcr = chcr_read(sh_chan);
+
+	if (chcr & CHCR_TE) {
+		/* DMA stop */
+		dmae_halt(sh_chan);
+
+		ret = IRQ_HANDLED;
+		tasklet_schedule(&sh_chan->tasklet);
+	}
+
+	spin_unlock(&sh_chan->desc_lock);
+
+	return ret;
+}
+
+/* Called from error IRQ or NMI */
+static bool sh_dmae_reset(struct sh_dmae_device *shdev)
+{
+	unsigned int handled = 0;
+	int i;
+
+	/* halt the dma controller */
+	sh_dmae_ctl_stop(shdev);
+
+	/* We cannot detect, which channel caused the error, have to reset all */
+	for (i = 0; i < SH_DMAC_MAX_CHANNELS; i++) {
+		struct sh_dmae_chan *sh_chan = shdev->chan[i];
+		struct sh_desc *desc;
+		LIST_HEAD(dl);
+
+		if (!sh_chan)
+			continue;
+
+		spin_lock(&sh_chan->desc_lock);
+
+		/* Stop the channel */
+		dmae_halt(sh_chan);
+
+		list_splice_init(&sh_chan->ld_queue, &dl);
+
+		if (!list_empty(&dl)) {
+			dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id);
+			pm_runtime_put(sh_chan->dev);
+		}
+		sh_chan->pm_state = DMAE_PM_ESTABLISHED;
+
+		spin_unlock(&sh_chan->desc_lock);
+
+		/* Complete all  */
+		list_for_each_entry(desc, &dl, node) {
+			struct dma_async_tx_descriptor *tx = &desc->async_tx;
+			desc->mark = DESC_IDLE;
+			if (tx->callback)
+				tx->callback(tx->callback_param);
+		}
+
+		spin_lock(&sh_chan->desc_lock);
+		list_splice(&dl, &sh_chan->ld_free);
+		spin_unlock(&sh_chan->desc_lock);
+
+		handled++;
+	}
+
+	sh_dmae_rst(shdev);
+
+	return !!handled;
+}
+
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+	struct sh_dmae_device *shdev = data;
+
+	if (!(dmaor_read(shdev) & DMAOR_AE))
+		return IRQ_NONE;
+
+	sh_dmae_reset(data);
+	return IRQ_HANDLED;
+}
+
+static void dmae_do_tasklet(unsigned long data)
+{
+	struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+	struct sh_desc *desc;
+	u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+	u32 dar_buf = sh_dmae_readl(sh_chan, DAR);
+
+	spin_lock_irq(&sh_chan->desc_lock);
+	list_for_each_entry(desc, &sh_chan->ld_queue, node) {
+		if (desc->mark == DESC_SUBMITTED &&
+		    ((desc->direction == DMA_DEV_TO_MEM &&
+		      (desc->hw.dar + desc->hw.tcr) == dar_buf) ||
+		     (desc->hw.sar + desc->hw.tcr) == sar_buf)) {
+			dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
+				desc->async_tx.cookie, &desc->async_tx,
+				desc->hw.dar);
+			desc->mark = DESC_COMPLETED;
+			break;
+		}
+	}
+	/* Next desc */
+	sh_chan_xfer_ld_queue(sh_chan);
+	spin_unlock_irq(&sh_chan->desc_lock);
+
+	sh_dmae_chan_ld_cleanup(sh_chan, false);
+}
+
+static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
+{
+	/* Fast path out if NMIF is not asserted for this controller */
+	if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
+		return false;
+
+	return sh_dmae_reset(shdev);
+}
+
+static int sh_dmae_nmi_handler(struct notifier_block *self,
+			       unsigned long cmd, void *data)
+{
+	struct sh_dmae_device *shdev;
+	int ret = NOTIFY_DONE;
+	bool triggered;
+
+	/*
+	 * Only concern ourselves with NMI events.
+	 *
+	 * Normally we would check the die chain value, but as this needs
+	 * to be architecture independent, check for NMI context instead.
+	 */
+	if (!in_nmi())
+		return NOTIFY_DONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) {
+		/*
+		 * Only stop if one of the controllers has NMIF asserted,
+		 * we do not want to interfere with regular address error
+		 * handling or NMI events that don't concern the DMACs.
+		 */
+		triggered = sh_dmae_nmi_notify(shdev);
+		if (triggered == true)
+			ret = NOTIFY_OK;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static struct notifier_block sh_dmae_nmi_notifier __read_mostly = {
+	.notifier_call	= sh_dmae_nmi_handler,
+
+	/* Run before NMI debug handler and KGDB */
+	.priority	= 1,
+};
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
+					int irq, unsigned long flags)
+{
+	int err;
+	const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
+	struct platform_device *pdev = to_platform_device(shdev->common.dev);
+	struct sh_dmae_chan *new_sh_chan;
+
+	/* alloc channel */
+	new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+	if (!new_sh_chan) {
+		dev_err(shdev->common.dev,
+			"No free memory for allocating dma channels!\n");
+		return -ENOMEM;
+	}
+
+	new_sh_chan->pm_state = DMAE_PM_ESTABLISHED;
+
+	/* reference struct dma_device */
+	new_sh_chan->common.device = &shdev->common;
+	dma_cookie_init(&new_sh_chan->common);
+
+	new_sh_chan->dev = shdev->common.dev;
+	new_sh_chan->id = id;
+	new_sh_chan->irq = irq;
+	new_sh_chan->base = shdev->chan_reg + chan_pdata->offset / sizeof(u32);
+
+	/* Init DMA tasklet */
+	tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
+			(unsigned long)new_sh_chan);
+
+	spin_lock_init(&new_sh_chan->desc_lock);
+
+	/* Init descripter manage list */
+	INIT_LIST_HEAD(&new_sh_chan->ld_queue);
+	INIT_LIST_HEAD(&new_sh_chan->ld_free);
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&new_sh_chan->common.device_node,
+			&shdev->common.channels);
+	shdev->common.chancnt++;
+
+	if (pdev->id >= 0)
+		snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+			 "sh-dmae%d.%d", pdev->id, new_sh_chan->id);
+	else
+		snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+			 "sh-dma%d", new_sh_chan->id);
+
+	/* set up channel irq */
+	err = request_irq(irq, &sh_dmae_interrupt, flags,
+			  new_sh_chan->dev_id, new_sh_chan);
+	if (err) {
+		dev_err(shdev->common.dev, "DMA channel %d request_irq error "
+			"with return %d\n", id, err);
+		goto err_no_irq;
+	}
+
+	shdev->chan[id] = new_sh_chan;
+	return 0;
+
+err_no_irq:
+	/* remove from dmaengine device node */
+	list_del(&new_sh_chan->common.device_node);
+	kfree(new_sh_chan);
+	return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+	int i;
+
+	for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
+		if (shdev->chan[i]) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+
+			free_irq(sh_chan->irq, sh_chan);
+
+			list_del(&sh_chan->common.device_node);
+			kfree(sh_chan);
+			shdev->chan[i] = NULL;
+		}
+	}
+	shdev->common.chancnt = 0;
+}
+
+static int __init sh_dmae_probe(struct platform_device *pdev)
+{
+	struct sh_dmae_pdata *pdata = pdev->dev.platform_data;
+	unsigned long irqflags = IRQF_DISABLED,
+		chan_flag[SH_DMAC_MAX_CHANNELS] = {};
+	int errirq, chan_irq[SH_DMAC_MAX_CHANNELS];
+	int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
+	struct sh_dmae_device *shdev;
+	struct resource *chan, *dmars, *errirq_res, *chanirq_res;
+
+	/* get platform data */
+	if (!pdata || !pdata->channel_num)
+		return -ENODEV;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* DMARS area is optional */
+	dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	/*
+	 * IRQ resources:
+	 * 1. there always must be at least one IRQ IO-resource. On SH4 it is
+	 *    the error IRQ, in which case it is the only IRQ in this resource:
+	 *    start == end. If it is the only IRQ resource, all channels also
+	 *    use the same IRQ.
+	 * 2. DMA channel IRQ resources can be specified one per resource or in
+	 *    ranges (start != end)
+	 * 3. iff all events (channels and, optionally, error) on this
+	 *    controller use the same IRQ, only one IRQ resource can be
+	 *    specified, otherwise there must be one IRQ per channel, even if
+	 *    some of them are equal
+	 * 4. if all IRQs on this controller are equal or if some specific IRQs
+	 *    specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be
+	 *    requested with the IRQF_SHARED flag
+	 */
+	errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!chan || !errirq_res)
+		return -ENODEV;
+
+	if (!request_mem_region(chan->start, resource_size(chan), pdev->name)) {
+		dev_err(&pdev->dev, "DMAC register region already claimed\n");
+		return -EBUSY;
+	}
+
+	if (dmars && !request_mem_region(dmars->start, resource_size(dmars), pdev->name)) {
+		dev_err(&pdev->dev, "DMAC DMARS region already claimed\n");
+		err = -EBUSY;
+		goto ermrdmars;
+	}
+
+	err = -ENOMEM;
+	shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+	if (!shdev) {
+		dev_err(&pdev->dev, "Not enough memory\n");
+		goto ealloc;
+	}
+
+	shdev->chan_reg = ioremap(chan->start, resource_size(chan));
+	if (!shdev->chan_reg)
+		goto emapchan;
+	if (dmars) {
+		shdev->dmars = ioremap(dmars->start, resource_size(dmars));
+		if (!shdev->dmars)
+			goto emapdmars;
+	}
+
+	/* platform data */
+	shdev->pdata = pdata;
+
+	if (pdata->chcr_offset)
+		shdev->chcr_offset = pdata->chcr_offset;
+	else
+		shdev->chcr_offset = CHCR;
+
+	if (pdata->chcr_ie_bit)
+		shdev->chcr_ie_bit = pdata->chcr_ie_bit;
+	else
+		shdev->chcr_ie_bit = CHCR_IE;
+
+	platform_set_drvdata(pdev, shdev);
+
+	shdev->common.dev = &pdev->dev;
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	spin_lock_irq(&sh_dmae_lock);
+	list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	/* reset dma controller - only needed as a test */
+	err = sh_dmae_rst(shdev);
+	if (err)
+		goto rst_err;
+
+	INIT_LIST_HEAD(&shdev->common.channels);
+
+	if (!pdata->slave_only)
+		dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+	if (pdata->slave && pdata->slave_num)
+		dma_cap_set(DMA_SLAVE, shdev->common.cap_mask);
+
+	shdev->common.device_alloc_chan_resources
+		= sh_dmae_alloc_chan_resources;
+	shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
+	shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
+	shdev->common.device_tx_status = sh_dmae_tx_status;
+	shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+
+	/* Compulsory for DMA_SLAVE fields */
+	shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
+	shdev->common.device_control = sh_dmae_control;
+
+	/* Default transfer size of 32 bytes requires 32-byte alignment */
+	shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
+
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+	chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
+
+	if (!chanirq_res)
+		chanirq_res = errirq_res;
+	else
+		irqres++;
+
+	if (chanirq_res == errirq_res ||
+	    (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE)
+		irqflags = IRQF_SHARED;
+
+	errirq = errirq_res->start;
+
+	err = request_irq(errirq, sh_dmae_err, irqflags,
+			  "DMAC Address Error", shdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"DMA failed requesting irq #%d, error %d\n",
+			errirq, err);
+		goto eirq_err;
+	}
+
+#else
+	chanirq_res = errirq_res;
+#endif /* CONFIG_CPU_SH4 || CONFIG_ARCH_SHMOBILE */
+
+	if (chanirq_res->start == chanirq_res->end &&
+	    !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) {
+		/* Special case - all multiplexed */
+		for (; irq_cnt < pdata->channel_num; irq_cnt++) {
+			if (irq_cnt < SH_DMAC_MAX_CHANNELS) {
+				chan_irq[irq_cnt] = chanirq_res->start;
+				chan_flag[irq_cnt] = IRQF_SHARED;
+			} else {
+				irq_cap = 1;
+				break;
+			}
+		}
+	} else {
+		do {
+			for (i = chanirq_res->start; i <= chanirq_res->end; i++) {
+				if (irq_cnt >= SH_DMAC_MAX_CHANNELS) {
+					irq_cap = 1;
+					break;
+				}
+
+				if ((errirq_res->flags & IORESOURCE_BITS) ==
+				    IORESOURCE_IRQ_SHAREABLE)
+					chan_flag[irq_cnt] = IRQF_SHARED;
+				else
+					chan_flag[irq_cnt] = IRQF_DISABLED;
+				dev_dbg(&pdev->dev,
+					"Found IRQ %d for channel %d\n",
+					i, irq_cnt);
+				chan_irq[irq_cnt++] = i;
+			}
+
+			if (irq_cnt >= SH_DMAC_MAX_CHANNELS)
+				break;
+
+			chanirq_res = platform_get_resource(pdev,
+						IORESOURCE_IRQ, ++irqres);
+		} while (irq_cnt < pdata->channel_num && chanirq_res);
+	}
+
+	/* Create DMA Channel */
+	for (i = 0; i < irq_cnt; i++) {
+		err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]);
+		if (err)
+			goto chan_probe_err;
+	}
+
+	if (irq_cap)
+		dev_notice(&pdev->dev, "Attempting to register %d DMA "
+			   "channels when a maximum of %d are supported.\n",
+			   pdata->channel_num, SH_DMAC_MAX_CHANNELS);
+
+	pm_runtime_put(&pdev->dev);
+
+	dma_async_device_register(&shdev->common);
+
+	return err;
+
+chan_probe_err:
+	sh_dmae_chan_remove(shdev);
+
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+	free_irq(errirq, shdev);
+eirq_err:
+#endif
+rst_err:
+	spin_lock_irq(&sh_dmae_lock);
+	list_del_rcu(&shdev->node);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	if (dmars)
+		iounmap(shdev->dmars);
+
+	platform_set_drvdata(pdev, NULL);
+emapdmars:
+	iounmap(shdev->chan_reg);
+	synchronize_rcu();
+emapchan:
+	kfree(shdev);
+ealloc:
+	if (dmars)
+		release_mem_region(dmars->start, resource_size(dmars));
+ermrdmars:
+	release_mem_region(chan->start, resource_size(chan));
+
+	return err;
+}
+
+static int __exit sh_dmae_remove(struct platform_device *pdev)
+{
+	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+	struct resource *res;
+	int errirq = platform_get_irq(pdev, 0);
+
+	dma_async_device_unregister(&shdev->common);
+
+	if (errirq > 0)
+		free_irq(errirq, shdev);
+
+	spin_lock_irq(&sh_dmae_lock);
+	list_del_rcu(&shdev->node);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	/* channel data remove */
+	sh_dmae_chan_remove(shdev);
+
+	pm_runtime_disable(&pdev->dev);
+
+	if (shdev->dmars)
+		iounmap(shdev->dmars);
+	iounmap(shdev->chan_reg);
+
+	platform_set_drvdata(pdev, NULL);
+
+	synchronize_rcu();
+	kfree(shdev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res)
+		release_mem_region(res->start, resource_size(res));
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (res)
+		release_mem_region(res->start, resource_size(res));
+
+	return 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+	sh_dmae_ctl_stop(shdev);
+}
+
+static int sh_dmae_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int sh_dmae_runtime_resume(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+	return sh_dmae_rst(shdev);
+}
+
+#ifdef CONFIG_PM
+static int sh_dmae_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int sh_dmae_resume(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+	int i, ret;
+
+	ret = sh_dmae_rst(shdev);
+	if (ret < 0)
+		dev_err(dev, "Failed to reset!\n");
+
+	for (i = 0; i < shdev->pdata->channel_num; i++) {
+		struct sh_dmae_chan *sh_chan = shdev->chan[i];
+		struct sh_dmae_slave *param = sh_chan->common.private;
+
+		if (!sh_chan->descs_allocated)
+			continue;
+
+		if (param) {
+			const struct sh_dmae_slave_config *cfg = param->config;
+			dmae_set_dmars(sh_chan, cfg->mid_rid);
+			dmae_set_chcr(sh_chan, cfg->chcr);
+		} else {
+			dmae_init(sh_chan);
+		}
+	}
+
+	return 0;
+}
+#else
+#define sh_dmae_suspend NULL
+#define sh_dmae_resume NULL
+#endif
+
+const struct dev_pm_ops sh_dmae_pm = {
+	.suspend		= sh_dmae_suspend,
+	.resume			= sh_dmae_resume,
+	.runtime_suspend	= sh_dmae_runtime_suspend,
+	.runtime_resume		= sh_dmae_runtime_resume,
+};
+
+static struct platform_driver sh_dmae_driver = {
+	.remove		= __exit_p(sh_dmae_remove),
+	.shutdown	= sh_dmae_shutdown,
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "sh-dma-engine",
+		.pm	= &sh_dmae_pm,
+	},
+};
+
+static int __init sh_dmae_init(void)
+{
+	/* Wire up NMI handling */
+	int err = register_die_notifier(&sh_dmae_nmi_notifier);
+	if (err)
+		return err;
+
+	return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+	platform_driver_unregister(&sh_dmae_driver);
+
+	unregister_die_notifier(&sh_dmae_nmi_notifier);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:sh-dma-engine");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
new file mode 100644
index 00000000..0b1d2c10
--- /dev/null
+++ b/drivers/dma/shdma.h
@@ -0,0 +1,66 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+
+#define SH_DMAC_MAX_CHANNELS 20
+#define SH_DMA_SLAVE_NUMBER 256
+#define SH_DMA_TCR_MAX 0x00FFFFFF	/* 16MB */
+
+struct device;
+
+enum dmae_pm_state {
+	DMAE_PM_ESTABLISHED,
+	DMAE_PM_BUSY,
+	DMAE_PM_PENDING,
+};
+
+struct sh_dmae_chan {
+	spinlock_t desc_lock;		/* Descriptor operation lock */
+	struct list_head ld_queue;	/* Link descriptors queue */
+	struct list_head ld_free;	/* Link descriptors free */
+	struct dma_chan common;		/* DMA common channel */
+	struct device *dev;		/* Channel device */
+	struct tasklet_struct tasklet;	/* Tasklet */
+	int descs_allocated;		/* desc count */
+	int xmit_shift;			/* log_2(bytes_per_xfer) */
+	int irq;
+	int id;				/* Raw id of this channel */
+	u32 __iomem *base;
+	char dev_id[16];		/* unique name per DMAC of channel */
+	int pm_error;
+	enum dmae_pm_state pm_state;
+};
+
+struct sh_dmae_device {
+	struct dma_device common;
+	struct sh_dmae_chan *chan[SH_DMAC_MAX_CHANNELS];
+	struct sh_dmae_pdata *pdata;
+	struct list_head node;
+	u32 __iomem *chan_reg;
+	u16 __iomem *dmars;
+	unsigned int chcr_offset;
+	u32 chcr_ie_bit;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+#define to_sh_dev(chan) container_of(chan->common.device,\
+				     struct sh_dmae_device, common)
+
+#endif	/* __DMA_SHDMA_H */
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
new file mode 100644
index 00000000..434ad311
--- /dev/null
+++ b/drivers/dma/sirf-dma.c
@@ -0,0 +1,698 @@
+/*
+ * DMA controller driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sirfsoc_dma.h>
+
+#include "dmaengine.h"
+
+#define SIRFSOC_DMA_DESCRIPTORS                 16
+#define SIRFSOC_DMA_CHANNELS                    16
+
+#define SIRFSOC_DMA_CH_ADDR                     0x00
+#define SIRFSOC_DMA_CH_XLEN                     0x04
+#define SIRFSOC_DMA_CH_YLEN                     0x08
+#define SIRFSOC_DMA_CH_CTRL                     0x0C
+
+#define SIRFSOC_DMA_WIDTH_0                     0x100
+#define SIRFSOC_DMA_CH_VALID                    0x140
+#define SIRFSOC_DMA_CH_INT                      0x144
+#define SIRFSOC_DMA_INT_EN                      0x148
+#define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
+
+#define SIRFSOC_DMA_MODE_CTRL_BIT               4
+#define SIRFSOC_DMA_DIR_CTRL_BIT                5
+
+/* xlen and dma_width register is in 4 bytes boundary */
+#define SIRFSOC_DMA_WORD_LEN			4
+
+struct sirfsoc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct list_head		node;
+
+	/* SiRFprimaII 2D-DMA parameters */
+
+	int             xlen;           /* DMA xlen */
+	int             ylen;           /* DMA ylen */
+	int             width;          /* DMA width */
+	int             dir;
+	bool            cyclic;         /* is loop DMA? */
+	u32             addr;		/* DMA buffer address */
+};
+
+struct sirfsoc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	unsigned long			happened_cyclic;
+	unsigned long			completed_cyclic;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+
+	int				mode;
+};
+
+struct sirfsoc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct sirfsoc_dma_chan		channels[SIRFSOC_DMA_CHANNELS];
+	void __iomem			*base;
+	int				irq;
+};
+
+#define DRV_NAME	"sirfsoc_dma"
+
+/* Convert struct dma_chan to struct sirfsoc_dma_chan */
+static inline
+struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct sirfsoc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct sirfsoc_dma */
+static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
+	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
+}
+
+/* Execute all queued DMA descriptors */
+static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+
+	/*
+	 * lock has been held by functions calling this, so we don't hold
+	 * lock again
+	 */
+
+	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
+		node);
+	/* Move the first queued descriptor to active list */
+	list_move_tail(&schan->queued, &schan->active);
+
+	/* Start the DMA transfer */
+	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
+		cid * 4);
+	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
+		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+
+	/*
+	 * writel has an implict memory write barrier to make sure data is
+	 * flushed into memory before starting DMA
+	 */
+	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		schan->happened_cyclic = schan->completed_cyclic = 0;
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
+{
+	struct sirfsoc_dma *sdma = data;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	u32 is;
+	int ch;
+
+	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+	while ((ch = fls(is) - 1) >= 0) {
+		is &= ~(1 << ch);
+		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
+		schan = &sdma->channels[ch];
+
+		spin_lock(&schan->lock);
+
+		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+			node);
+		if (!sdesc->cyclic) {
+			/* Execute queued descriptors */
+			list_splice_tail_init(&schan->active, &schan->completed);
+			if (!list_empty(&schan->queued))
+				sirfsoc_dma_execute(schan);
+		} else
+			schan->happened_cyclic++;
+
+		spin_unlock(&schan->lock);
+	}
+
+	/* Schedule tasklet */
+	tasklet_schedule(&sdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	unsigned long happened_cyclic;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < sdma->dma.chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&schan->lock, flags);
+		if (!list_empty(&schan->completed)) {
+			list_splice_tail_init(&schan->completed, &list);
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			/* Execute callbacks and run dependencies */
+			list_for_each_entry(sdesc, &list, node) {
+				desc = &sdesc->desc;
+
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+
+				last_cookie = desc->cookie;
+				dma_run_dependencies(desc);
+			}
+
+			/* Free descriptors */
+			spin_lock_irqsave(&schan->lock, flags);
+			list_splice_tail_init(&list, &schan->free);
+			schan->chan.completed_cookie = last_cookie;
+			spin_unlock_irqrestore(&schan->lock, flags);
+		} else {
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+				node);
+
+			if (!sdesc || (sdesc && !sdesc->cyclic)) {
+				/* without active cyclic DMA */
+				spin_unlock_irqrestore(&schan->lock, flags);
+				continue;
+			}
+
+			/* cyclic DMA */
+			happened_cyclic = schan->happened_cyclic;
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			desc = &sdesc->desc;
+			while (happened_cyclic != schan->completed_cyclic) {
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+				schan->completed_cyclic++;
+			}
+		}
+	}
+}
+
+/* DMA Tasklet */
+static void sirfsoc_dma_tasklet(unsigned long data)
+{
+	struct sirfsoc_dma *sdma = (void *)data;
+
+	sirfsoc_dma_process_completed(sdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&sdesc->node, &schan->queued);
+
+	cookie = dma_cookie_assign(txd);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return cookie;
+}
+
+static int sirfsoc_dma_slave_config(struct sirfsoc_dma_chan *schan,
+	struct dma_slave_config *config)
+{
+	unsigned long flags;
+
+	if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+		(config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	schan->mode = (config->src_maxburst == 4 ? 1 : 0);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_terminate_all(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+		~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
+		& ~((1 << cid) | 1 << (cid + 16)),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+
+	spin_lock_irqsave(&schan->lock, flags);
+	list_splice_tail_init(&schan->active, &schan->free);
+	list_splice_tail_init(&schan->queued, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct dma_slave_config *config;
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		return sirfsoc_dma_terminate_all(schan);
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		return sirfsoc_dma_slave_config(schan, config);
+
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+/* Alloc channel resources */
+static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
+		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
+		if (!sdesc) {
+			dev_notice(sdma->dma.dev, "Memory allocation error. "
+				"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&sdesc->desc, chan);
+		sdesc->desc.flags = DMA_CTRL_ACK;
+		sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
+
+		list_add_tail(&sdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	list_splice_tail_init(&descs, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return i;
+}
+
+/* Free channel resources */
+static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&schan->prepared));
+	BUG_ON(!list_empty(&schan->queued));
+	BUG_ON(!list_empty(&schan->active));
+	BUG_ON(!list_empty(&schan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&schan->free, &descs);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(sdesc, tmp, &descs, node)
+		kfree(sdesc);
+}
+
+/* Send pending descriptor to hardware */
+static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	if (list_empty(&schan->active) && !list_empty(&schan->queued))
+		sirfsoc_dma_execute(schan);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+}
+
+/* Check request completion status */
+static enum dma_status
+sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	struct dma_tx_state *txstate)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+	enum dma_status ret;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return ret;
+}
+
+static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+	int ret;
+
+	if ((xt->dir != DMA_MEM_TO_DEV) || (xt->dir != DMA_DEV_TO_MEM)) {
+		ret = -EINVAL;
+		goto err_dir;
+	}
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc) {
+		/* try to free completed descriptors */
+		sirfsoc_dma_process_completed(sdma);
+		ret = 0;
+		goto no_desc;
+	}
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+
+	/*
+	 * Number of chunks in a frame can only be 1 for prima2
+	 * and ylen (number of frame - 1) must be at least 0
+	 */
+	if ((xt->frame_size == 1) && (xt->numf > 0)) {
+		sdesc->cyclic = 0;
+		sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
+		sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
+				SIRFSOC_DMA_WORD_LEN;
+		sdesc->ylen = xt->numf - 1;
+		if (xt->dir == DMA_MEM_TO_DEV) {
+			sdesc->addr = xt->src_start;
+			sdesc->dir = 1;
+		} else {
+			sdesc->addr = xt->dst_start;
+			sdesc->dir = 0;
+		}
+
+		list_add_tail(&sdesc->node, &schan->prepared);
+	} else {
+		pr_err("sirfsoc DMA Invalid xfer\n");
+		ret = -EINVAL;
+		goto err_xfer;
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+err_xfer:
+	spin_unlock_irqrestore(&schan->lock, iflags);
+no_desc:
+err_dir:
+	return ERR_PTR(ret);
+}
+
+static struct dma_async_tx_descriptor *
+sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
+	size_t buf_len, size_t period_len,
+	enum dma_transfer_direction direction, void *context)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+
+	/*
+	 * we only support cycle transfer with 2 period
+	 * If the X-length is set to 0, it would be the loop mode.
+	 * The DMA address keeps increasing until reaching the end of a loop
+	 * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
+	 * the DMA address goes back to the beginning of this area.
+	 * In loop mode, the DMA data region is divided into two parts, BUFA
+	 * and BUFB. DMA controller generates interrupts twice in each loop:
+	 * when the DMA address reaches the end of BUFA or the end of the
+	 * BUFB
+	 */
+	if (buf_len !=  2 * period_len)
+		return ERR_PTR(-EINVAL);
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc)
+		return 0;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+	sdesc->addr = addr;
+	sdesc->cyclic = 1;
+	sdesc->xlen = 0;
+	sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
+	sdesc->width = 1;
+	list_add_tail(&sdesc->node, &schan->prepared);
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+}
+
+/*
+ * The DMA controller consists of 16 independent DMA channels.
+ * Each channel is allocated to a different function
+ */
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned int ch_nr = (unsigned int) chan_id;
+
+	if (ch_nr == chan->chan_id +
+		chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(sirfsoc_dma_filter_id);
+
+static int __devinit sirfsoc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct sirfsoc_dma *sdma;
+	struct sirfsoc_dma_chan *schan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	u32 id;
+	int ret, i;
+
+	sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
+	if (!sdma) {
+		dev_err(dev, "Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	if (of_property_read_u32(dn, "cell-index", &id)) {
+		dev_err(dev, "Fail to get DMAC index\n");
+		ret = -ENODEV;
+		goto free_mem;
+	}
+
+	sdma->irq = irq_of_parse_and_map(dn, 0);
+	if (sdma->irq == NO_IRQ) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		ret = -EINVAL;
+		goto free_mem;
+	}
+
+	ret = of_address_to_resource(dn, 0, &res);
+	if (ret) {
+		dev_err(dev, "Error parsing memory region!\n");
+		goto free_mem;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	sdma->base = devm_ioremap(dev, regs_start, regs_size);
+	if (!sdma->base) {
+		dev_err(dev, "Error mapping memory region!\n");
+		ret = -ENOMEM;
+		goto irq_dispose;
+	}
+
+	ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
+		sdma);
+	if (ret) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		ret = -EINVAL;
+		goto unmap_mem;
+	}
+
+	dma = &sdma->dma;
+	dma->dev = dev;
+	dma->chancnt = SIRFSOC_DMA_CHANNELS;
+
+	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
+	dma->device_issue_pending = sirfsoc_dma_issue_pending;
+	dma->device_control = sirfsoc_dma_control;
+	dma->device_tx_status = sirfsoc_dma_tx_status;
+	dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
+	dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		schan->chan.device = dma;
+		dma_cookie_init(&schan->chan);
+
+		INIT_LIST_HEAD(&schan->free);
+		INIT_LIST_HEAD(&schan->prepared);
+		INIT_LIST_HEAD(&schan->queued);
+		INIT_LIST_HEAD(&schan->active);
+		INIT_LIST_HEAD(&schan->completed);
+
+		spin_lock_init(&schan->lock);
+		list_add_tail(&schan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, sdma);
+	ret = dma_async_device_register(dma);
+	if (ret)
+		goto free_irq;
+
+	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
+
+	return 0;
+
+free_irq:
+	devm_free_irq(dev, sdma->irq, sdma);
+irq_dispose:
+	irq_dispose_mapping(sdma->irq);
+unmap_mem:
+	iounmap(sdma->base);
+free_mem:
+	devm_kfree(dev, sdma);
+	return ret;
+}
+
+static int __devexit sirfsoc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&sdma->dma);
+	devm_free_irq(dev, sdma->irq, sdma);
+	irq_dispose_mapping(sdma->irq);
+	iounmap(sdma->base);
+	devm_kfree(dev, sdma);
+	return 0;
+}
+
+static struct of_device_id sirfsoc_dma_match[] = {
+	{ .compatible = "sirf,prima2-dmac", },
+	{},
+};
+
+static struct platform_driver sirfsoc_dma_driver = {
+	.probe		= sirfsoc_dma_probe,
+	.remove		= __devexit_p(sirfsoc_dma_remove),
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table	= sirfsoc_dma_match,
+	},
+};
+
+module_platform_driver(sirfsoc_dma_driver);
+
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
+	"Barry Song <baohua.song@csr.com>");
+MODULE_DESCRIPTION("SIRFSOC DMA control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
new file mode 100644
index 00000000..2ed1ac35
--- /dev/null
+++ b/drivers/dma/ste_dma40.c
@@ -0,0 +1,3428 @@
+/*
+ * Copyright (C) Ericsson AB 2007-2008
+ * Copyright (C) ST-Ericsson SA 2008-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/amba/bus.h>
+#include <linux/regulator/consumer.h>
+
+#include <plat/ste_dma40.h>
+
+#include "dmaengine.h"
+#include "ste_dma40_ll.h"
+
+#define D40_NAME "dma40"
+
+#define D40_PHY_CHAN -1
+
+/* For masking out/in 2 bit channel positions */
+#define D40_CHAN_POS(chan)  (2 * (chan / 2))
+#define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan))
+
+/* Maximum iterations taken before giving up suspending a channel */
+#define D40_SUSPEND_MAX_IT 500
+
+/* Milliseconds */
+#define DMA40_AUTOSUSPEND_DELAY	100
+
+/* Hardware requirement on LCLA alignment */
+#define LCLA_ALIGNMENT 0x40000
+
+/* Max number of links per event group */
+#define D40_LCLA_LINK_PER_EVENT_GRP 128
+#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP
+
+/* Attempts before giving up to trying to get pages that are aligned */
+#define MAX_LCLA_ALLOC_ATTEMPTS 256
+
+/* Bit markings for allocation map */
+#define D40_ALLOC_FREE		(1 << 31)
+#define D40_ALLOC_PHY		(1 << 30)
+#define D40_ALLOC_LOG_FREE	0
+
+/**
+ * enum 40_command - The different commands and/or statuses.
+ *
+ * @D40_DMA_STOP: DMA channel command STOP or status STOPPED,
+ * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN.
+ * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible.
+ * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED.
+ */
+enum d40_command {
+	D40_DMA_STOP		= 0,
+	D40_DMA_RUN		= 1,
+	D40_DMA_SUSPEND_REQ	= 2,
+	D40_DMA_SUSPENDED	= 3
+};
+
+/*
+ * enum d40_events - The different Event Enables for the event lines.
+ *
+ * @D40_DEACTIVATE_EVENTLINE: De-activate Event line, stopping the logical chan.
+ * @D40_ACTIVATE_EVENTLINE: Activate the Event line, to start a logical chan.
+ * @D40_SUSPEND_REQ_EVENTLINE: Requesting for suspending a event line.
+ * @D40_ROUND_EVENTLINE: Status check for event line.
+ */
+
+enum d40_events {
+	D40_DEACTIVATE_EVENTLINE	= 0,
+	D40_ACTIVATE_EVENTLINE		= 1,
+	D40_SUSPEND_REQ_EVENTLINE	= 2,
+	D40_ROUND_EVENTLINE		= 3
+};
+
+/*
+ * These are the registers that has to be saved and later restored
+ * when the DMA hw is powered off.
+ * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works.
+ */
+static u32 d40_backup_regs[] = {
+	D40_DREG_LCPA,
+	D40_DREG_LCLA,
+	D40_DREG_PRMSE,
+	D40_DREG_PRMSO,
+	D40_DREG_PRMOE,
+	D40_DREG_PRMOO,
+};
+
+#define BACKUP_REGS_SZ ARRAY_SIZE(d40_backup_regs)
+
+/* TODO: Check if all these registers have to be saved/restored on dma40 v3 */
+static u32 d40_backup_regs_v3[] = {
+	D40_DREG_PSEG1,
+	D40_DREG_PSEG2,
+	D40_DREG_PSEG3,
+	D40_DREG_PSEG4,
+	D40_DREG_PCEG1,
+	D40_DREG_PCEG2,
+	D40_DREG_PCEG3,
+	D40_DREG_PCEG4,
+	D40_DREG_RSEG1,
+	D40_DREG_RSEG2,
+	D40_DREG_RSEG3,
+	D40_DREG_RSEG4,
+	D40_DREG_RCEG1,
+	D40_DREG_RCEG2,
+	D40_DREG_RCEG3,
+	D40_DREG_RCEG4,
+};
+
+#define BACKUP_REGS_SZ_V3 ARRAY_SIZE(d40_backup_regs_v3)
+
+static u32 d40_backup_regs_chan[] = {
+	D40_CHAN_REG_SSCFG,
+	D40_CHAN_REG_SSELT,
+	D40_CHAN_REG_SSPTR,
+	D40_CHAN_REG_SSLNK,
+	D40_CHAN_REG_SDCFG,
+	D40_CHAN_REG_SDELT,
+	D40_CHAN_REG_SDPTR,
+	D40_CHAN_REG_SDLNK,
+};
+
+/**
+ * struct d40_lli_pool - Structure for keeping LLIs in memory
+ *
+ * @base: Pointer to memory area when the pre_alloc_lli's are not large
+ * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
+ * pre_alloc_lli is used.
+ * @dma_addr: DMA address, if mapped
+ * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
+ * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
+ * one buffer to one buffer.
+ */
+struct d40_lli_pool {
+	void	*base;
+	int	 size;
+	dma_addr_t	dma_addr;
+	/* Space for dst and src, plus an extra for padding */
+	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
+};
+
+/**
+ * struct d40_desc - A descriptor is one DMA job.
+ *
+ * @lli_phy: LLI settings for physical channel. Both src and dst=
+ * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if
+ * lli_len equals one.
+ * @lli_log: Same as above but for logical channels.
+ * @lli_pool: The pool with two entries pre-allocated.
+ * @lli_len: Number of llis of current descriptor.
+ * @lli_current: Number of transferred llis.
+ * @lcla_alloc: Number of LCLA entries allocated.
+ * @txd: DMA engine struct. Used for among other things for communication
+ * during a transfer.
+ * @node: List entry.
+ * @is_in_client_list: true if the client owns this descriptor.
+ * @cyclic: true if this is a cyclic job
+ *
+ * This descriptor is used for both logical and physical transfers.
+ */
+struct d40_desc {
+	/* LLI physical */
+	struct d40_phy_lli_bidir	 lli_phy;
+	/* LLI logical */
+	struct d40_log_lli_bidir	 lli_log;
+
+	struct d40_lli_pool		 lli_pool;
+	int				 lli_len;
+	int				 lli_current;
+	int				 lcla_alloc;
+
+	struct dma_async_tx_descriptor	 txd;
+	struct list_head		 node;
+
+	bool				 is_in_client_list;
+	bool				 cyclic;
+};
+
+/**
+ * struct d40_lcla_pool - LCLA pool settings and data.
+ *
+ * @base: The virtual address of LCLA. 18 bit aligned.
+ * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
+ * This pointer is only there for clean-up on error.
+ * @pages: The number of pages needed for all physical channels.
+ * Only used later for clean-up on error
+ * @lock: Lock to protect the content in this struct.
+ * @alloc_map: big map over which LCLA entry is own by which job.
+ */
+struct d40_lcla_pool {
+	void		*base;
+	dma_addr_t	dma_addr;
+	void		*base_unaligned;
+	int		 pages;
+	spinlock_t	 lock;
+	struct d40_desc	**alloc_map;
+};
+
+/**
+ * struct d40_phy_res - struct for handling eventlines mapped to physical
+ * channels.
+ *
+ * @lock: A lock protection this entity.
+ * @reserved: True if used by secure world or otherwise.
+ * @num: The physical channel number of this entity.
+ * @allocated_src: Bit mapped to show which src event line's are mapped to
+ * this physical channel. Can also be free or physically allocated.
+ * @allocated_dst: Same as for src but is dst.
+ * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
+ * event line number.
+ */
+struct d40_phy_res {
+	spinlock_t lock;
+	bool	   reserved;
+	int	   num;
+	u32	   allocated_src;
+	u32	   allocated_dst;
+};
+
+struct d40_base;
+
+/**
+ * struct d40_chan - Struct that describes a channel.
+ *
+ * @lock: A spinlock to protect this struct.
+ * @log_num: The logical number, if any of this channel.
+ * @pending_tx: The number of pending transfers. Used between interrupt handler
+ * and tasklet.
+ * @busy: Set to true when transfer is ongoing on this channel.
+ * @phy_chan: Pointer to physical channel which this instance runs on. If this
+ * point is NULL, then the channel is not allocated.
+ * @chan: DMA engine handle.
+ * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
+ * transfer and call client callback.
+ * @client: Cliented owned descriptor list.
+ * @pending_queue: Submitted jobs, to be issued by issue_pending()
+ * @active: Active descriptor.
+ * @queue: Queued jobs.
+ * @prepare_queue: Prepared jobs.
+ * @dma_cfg: The client configuration of this dma channel.
+ * @configured: whether the dma_cfg configuration is valid
+ * @base: Pointer to the device instance struct.
+ * @src_def_cfg: Default cfg register setting for src.
+ * @dst_def_cfg: Default cfg register setting for dst.
+ * @log_def: Default logical channel settings.
+ * @lcpa: Pointer to dst and src lcpa settings.
+ * @runtime_addr: runtime configured address.
+ * @runtime_direction: runtime configured direction.
+ *
+ * This struct can either "be" a logical or a physical channel.
+ */
+struct d40_chan {
+	spinlock_t			 lock;
+	int				 log_num;
+	int				 pending_tx;
+	bool				 busy;
+	struct d40_phy_res		*phy_chan;
+	struct dma_chan			 chan;
+	struct tasklet_struct		 tasklet;
+	struct list_head		 client;
+	struct list_head		 pending_queue;
+	struct list_head		 active;
+	struct list_head		 queue;
+	struct list_head		 prepare_queue;
+	struct stedma40_chan_cfg	 dma_cfg;
+	bool				 configured;
+	struct d40_base			*base;
+	/* Default register configurations */
+	u32				 src_def_cfg;
+	u32				 dst_def_cfg;
+	struct d40_def_lcsp		 log_def;
+	struct d40_log_lli_full		*lcpa;
+	/* Runtime reconfiguration */
+	dma_addr_t			runtime_addr;
+	enum dma_transfer_direction	runtime_direction;
+};
+
+/**
+ * struct d40_base - The big global struct, one for each probe'd instance.
+ *
+ * @interrupt_lock: Lock used to make sure one interrupt is handle a time.
+ * @execmd_lock: Lock for execute command usage since several channels share
+ * the same physical register.
+ * @dev: The device structure.
+ * @virtbase: The virtual base address of the DMA's register.
+ * @rev: silicon revision detected.
+ * @clk: Pointer to the DMA clock structure.
+ * @phy_start: Physical memory start of the DMA registers.
+ * @phy_size: Size of the DMA register map.
+ * @irq: The IRQ number.
+ * @num_phy_chans: The number of physical channels. Read from HW. This
+ * is the number of available channels for this driver, not counting "Secure
+ * mode" allocated physical channels.
+ * @num_log_chans: The number of logical channels. Calculated from
+ * num_phy_chans.
+ * @dma_both: dma_device channels that can do both memcpy and slave transfers.
+ * @dma_slave: dma_device channels that can do only do slave transfers.
+ * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
+ * @phy_chans: Room for all possible physical channels in system.
+ * @log_chans: Room for all possible logical channels in system.
+ * @lookup_log_chans: Used to map interrupt number to logical channel. Points
+ * to log_chans entries.
+ * @lookup_phy_chans: Used to map interrupt number to physical channel. Points
+ * to phy_chans entries.
+ * @plat_data: Pointer to provided platform_data which is the driver
+ * configuration.
+ * @lcpa_regulator: Pointer to hold the regulator for the esram bank for lcla.
+ * @phy_res: Vector containing all physical channels.
+ * @lcla_pool: lcla pool settings and data.
+ * @lcpa_base: The virtual mapped address of LCPA.
+ * @phy_lcpa: The physical address of the LCPA.
+ * @lcpa_size: The size of the LCPA area.
+ * @desc_slab: cache for descriptors.
+ * @reg_val_backup: Here the values of some hardware registers are stored
+ * before the DMA is powered off. They are restored when the power is back on.
+ * @reg_val_backup_v3: Backup of registers that only exits on dma40 v3 and
+ * later.
+ * @reg_val_backup_chan: Backup data for standard channel parameter registers.
+ * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
+ * @initialized: true if the dma has been initialized
+ */
+struct d40_base {
+	spinlock_t			 interrupt_lock;
+	spinlock_t			 execmd_lock;
+	struct device			 *dev;
+	void __iomem			 *virtbase;
+	u8				  rev:4;
+	struct clk			 *clk;
+	phys_addr_t			  phy_start;
+	resource_size_t			  phy_size;
+	int				  irq;
+	int				  num_phy_chans;
+	int				  num_log_chans;
+	struct dma_device		  dma_both;
+	struct dma_device		  dma_slave;
+	struct dma_device		  dma_memcpy;
+	struct d40_chan			 *phy_chans;
+	struct d40_chan			 *log_chans;
+	struct d40_chan			**lookup_log_chans;
+	struct d40_chan			**lookup_phy_chans;
+	struct stedma40_platform_data	 *plat_data;
+	struct regulator		 *lcpa_regulator;
+	/* Physical half channels */
+	struct d40_phy_res		 *phy_res;
+	struct d40_lcla_pool		  lcla_pool;
+	void				 *lcpa_base;
+	dma_addr_t			  phy_lcpa;
+	resource_size_t			  lcpa_size;
+	struct kmem_cache		 *desc_slab;
+	u32				  reg_val_backup[BACKUP_REGS_SZ];
+	u32				  reg_val_backup_v3[BACKUP_REGS_SZ_V3];
+	u32				 *reg_val_backup_chan;
+	u16				  gcc_pwr_off_mask;
+	bool				  initialized;
+};
+
+/**
+ * struct d40_interrupt_lookup - lookup table for interrupt handler
+ *
+ * @src: Interrupt mask register.
+ * @clr: Interrupt clear register.
+ * @is_error: true if this is an error interrupt.
+ * @offset: start delta in the lookup_log_chans in d40_base. If equals to
+ * D40_PHY_CHAN, the lookup_phy_chans shall be used instead.
+ */
+struct d40_interrupt_lookup {
+	u32 src;
+	u32 clr;
+	bool is_error;
+	int offset;
+};
+
+/**
+ * struct d40_reg_val - simple lookup struct
+ *
+ * @reg: The register.
+ * @val: The value that belongs to the register in reg.
+ */
+struct d40_reg_val {
+	unsigned int reg;
+	unsigned int val;
+};
+
+static struct device *chan2dev(struct d40_chan *d40c)
+{
+	return &d40c->chan.dev->device;
+}
+
+static bool chan_is_physical(struct d40_chan *chan)
+{
+	return chan->log_num == D40_PHY_CHAN;
+}
+
+static bool chan_is_logical(struct d40_chan *chan)
+{
+	return !chan_is_physical(chan);
+}
+
+static void __iomem *chan_base(struct d40_chan *chan)
+{
+	return chan->base->virtbase + D40_DREG_PCBASE +
+	       chan->phy_chan->num * D40_DREG_PCDELTA;
+}
+
+#define d40_err(dev, format, arg...)		\
+	dev_err(dev, "[%s] " format, __func__, ## arg)
+
+#define chan_err(d40c, format, arg...)		\
+	d40_err(chan2dev(d40c), format, ## arg)
+
+static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
+			      int lli_len)
+{
+	bool is_log = chan_is_logical(d40c);
+	u32 align;
+	void *base;
+
+	if (is_log)
+		align = sizeof(struct d40_log_lli);
+	else
+		align = sizeof(struct d40_phy_lli);
+
+	if (lli_len == 1) {
+		base = d40d->lli_pool.pre_alloc_lli;
+		d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
+		d40d->lli_pool.base = NULL;
+	} else {
+		d40d->lli_pool.size = lli_len * 2 * align;
+
+		base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
+		d40d->lli_pool.base = base;
+
+		if (d40d->lli_pool.base == NULL)
+			return -ENOMEM;
+	}
+
+	if (is_log) {
+		d40d->lli_log.src = PTR_ALIGN(base, align);
+		d40d->lli_log.dst = d40d->lli_log.src + lli_len;
+
+		d40d->lli_pool.dma_addr = 0;
+	} else {
+		d40d->lli_phy.src = PTR_ALIGN(base, align);
+		d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
+
+		d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev,
+							 d40d->lli_phy.src,
+							 d40d->lli_pool.size,
+							 DMA_TO_DEVICE);
+
+		if (dma_mapping_error(d40c->base->dev,
+				      d40d->lli_pool.dma_addr)) {
+			kfree(d40d->lli_pool.base);
+			d40d->lli_pool.base = NULL;
+			d40d->lli_pool.dma_addr = 0;
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	if (d40d->lli_pool.dma_addr)
+		dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr,
+				 d40d->lli_pool.size, DMA_TO_DEVICE);
+
+	kfree(d40d->lli_pool.base);
+	d40d->lli_pool.base = NULL;
+	d40d->lli_pool.size = 0;
+	d40d->lli_log.src = NULL;
+	d40d->lli_log.dst = NULL;
+	d40d->lli_phy.src = NULL;
+	d40d->lli_phy.dst = NULL;
+}
+
+static int d40_lcla_alloc_one(struct d40_chan *d40c,
+			      struct d40_desc *d40d)
+{
+	unsigned long flags;
+	int i;
+	int ret = -EINVAL;
+	int p;
+
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+	p = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP;
+
+	/*
+	 * Allocate both src and dst at the same time, therefore the half
+	 * start on 1 since 0 can't be used since zero is used as end marker.
+	 */
+	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+		if (!d40c->base->lcla_pool.alloc_map[p + i]) {
+			d40c->base->lcla_pool.alloc_map[p + i] = d40d;
+			d40d->lcla_alloc++;
+			ret = i;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+	return ret;
+}
+
+static int d40_lcla_free_all(struct d40_chan *d40c,
+			     struct d40_desc *d40d)
+{
+	unsigned long flags;
+	int i;
+	int ret = -EINVAL;
+
+	if (chan_is_physical(d40c))
+		return 0;
+
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+		if (d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
+						    D40_LCLA_LINK_PER_EVENT_GRP + i] == d40d) {
+			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
+							D40_LCLA_LINK_PER_EVENT_GRP + i] = NULL;
+			d40d->lcla_alloc--;
+			if (d40d->lcla_alloc == 0) {
+				ret = 0;
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+	return ret;
+
+}
+
+static void d40_desc_remove(struct d40_desc *d40d)
+{
+	list_del(&d40d->node);
+}
+
+static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
+{
+	struct d40_desc *desc = NULL;
+
+	if (!list_empty(&d40c->client)) {
+		struct d40_desc *d;
+		struct d40_desc *_d;
+
+		list_for_each_entry_safe(d, _d, &d40c->client, node) {
+			if (async_tx_test_ack(&d->txd)) {
+				d40_desc_remove(d);
+				desc = d;
+				memset(desc, 0, sizeof(*desc));
+				break;
+			}
+		}
+	}
+
+	if (!desc)
+		desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT);
+
+	if (desc)
+		INIT_LIST_HEAD(&desc->node);
+
+	return desc;
+}
+
+static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+
+	d40_pool_lli_free(d40c, d40d);
+	d40_lcla_free_all(d40c, d40d);
+	kmem_cache_free(d40c->base->desc_slab, d40d);
+}
+
+static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
+{
+	list_add_tail(&desc->node, &d40c->active);
+}
+
+static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_phy_lli *lli_dst = desc->lli_phy.dst;
+	struct d40_phy_lli *lli_src = desc->lli_phy.src;
+	void __iomem *base = chan_base(chan);
+
+	writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG);
+	writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT);
+	writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR);
+	writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK);
+
+	writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG);
+	writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT);
+	writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR);
+	writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK);
+}
+
+static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_lcla_pool *pool = &chan->base->lcla_pool;
+	struct d40_log_lli_bidir *lli = &desc->lli_log;
+	int lli_current = desc->lli_current;
+	int lli_len = desc->lli_len;
+	bool cyclic = desc->cyclic;
+	int curr_lcla = -EINVAL;
+	int first_lcla = 0;
+	bool use_esram_lcla = chan->base->plat_data->use_esram_lcla;
+	bool linkback;
+
+	/*
+	 * We may have partially running cyclic transfers, in case we did't get
+	 * enough LCLA entries.
+	 */
+	linkback = cyclic && lli_current == 0;
+
+	/*
+	 * For linkback, we need one LCLA even with only one link, because we
+	 * can't link back to the one in LCPA space
+	 */
+	if (linkback || (lli_len - lli_current > 1)) {
+		curr_lcla = d40_lcla_alloc_one(chan, desc);
+		first_lcla = curr_lcla;
+	}
+
+	/*
+	 * For linkback, we normally load the LCPA in the loop since we need to
+	 * link it to the second LCLA and not the first.  However, if we
+	 * couldn't even get a first LCLA, then we have to run in LCPA and
+	 * reload manually.
+	 */
+	if (!linkback || curr_lcla == -EINVAL) {
+		unsigned int flags = 0;
+
+		if (curr_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
+
+		d40_log_lli_lcpa_write(chan->lcpa,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       curr_lcla,
+				       flags);
+		lli_current++;
+	}
+
+	if (curr_lcla < 0)
+		goto out;
+
+	for (; lli_current < lli_len; lli_current++) {
+		unsigned int lcla_offset = chan->phy_chan->num * 1024 +
+					   8 * curr_lcla * 2;
+		struct d40_log_lli *lcla = pool->base + lcla_offset;
+		unsigned int flags = 0;
+		int next_lcla;
+
+		if (lli_current + 1 < lli_len)
+			next_lcla = d40_lcla_alloc_one(chan, desc);
+		else
+			next_lcla = linkback ? first_lcla : -EINVAL;
+
+		if (cyclic || next_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
+
+		if (linkback && curr_lcla == first_lcla) {
+			/* First link goes in both LCPA and LCLA */
+			d40_log_lli_lcpa_write(chan->lcpa,
+					       &lli->dst[lli_current],
+					       &lli->src[lli_current],
+					       next_lcla, flags);
+		}
+
+		/*
+		 * One unused LCLA in the cyclic case if the very first
+		 * next_lcla fails...
+		 */
+		d40_log_lli_lcla_write(lcla,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       next_lcla, flags);
+
+		/*
+		 * Cache maintenance is not needed if lcla is
+		 * mapped in esram
+		 */
+		if (!use_esram_lcla) {
+			dma_sync_single_range_for_device(chan->base->dev,
+						pool->dma_addr, lcla_offset,
+						2 * sizeof(struct d40_log_lli),
+						DMA_TO_DEVICE);
+		}
+		curr_lcla = next_lcla;
+
+		if (curr_lcla == -EINVAL || curr_lcla == first_lcla) {
+			lli_current++;
+			break;
+		}
+	}
+
+out:
+	desc->lli_current = lli_current;
+}
+
+static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	if (chan_is_physical(d40c)) {
+		d40_phy_lli_load(d40c, d40d);
+		d40d->lli_current = d40d->lli_len;
+	} else
+		d40_log_lli_to_lcxa(d40c, d40d);
+}
+
+static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
+{
+	struct d40_desc *d;
+
+	if (list_empty(&d40c->active))
+		return NULL;
+
+	d = list_first_entry(&d40c->active,
+			     struct d40_desc,
+			     node);
+	return d;
+}
+
+/* remove desc from current queue and add it to the pending_queue */
+static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
+{
+	d40_desc_remove(desc);
+	desc->is_in_client_list = false;
+	list_add_tail(&desc->node, &d40c->pending_queue);
+}
+
+static struct d40_desc *d40_first_pending(struct d40_chan *d40c)
+{
+	struct d40_desc *d;
+
+	if (list_empty(&d40c->pending_queue))
+		return NULL;
+
+	d = list_first_entry(&d40c->pending_queue,
+			     struct d40_desc,
+			     node);
+	return d;
+}
+
+static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
+{
+	struct d40_desc *d;
+
+	if (list_empty(&d40c->queue))
+		return NULL;
+
+	d = list_first_entry(&d40c->queue,
+			     struct d40_desc,
+			     node);
+	return d;
+}
+
+static int d40_psize_2_burst_size(bool is_log, int psize)
+{
+	if (is_log) {
+		if (psize == STEDMA40_PSIZE_LOG_1)
+			return 1;
+	} else {
+		if (psize == STEDMA40_PSIZE_PHY_1)
+			return 1;
+	}
+
+	return 2 << psize;
+}
+
+/*
+ * The dma only supports transmitting packages up to
+ * STEDMA40_MAX_SEG_SIZE << data_width. Calculate the total number of
+ * dma elements required to send the entire sg list
+ */
+static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2)
+{
+	int dmalen;
+	u32 max_w = max(data_width1, data_width2);
+	u32 min_w = min(data_width1, data_width2);
+	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w);
+
+	if (seg_max > STEDMA40_MAX_SEG_SIZE)
+		seg_max -= (1 << max_w);
+
+	if (!IS_ALIGNED(size, 1 << max_w))
+		return -EINVAL;
+
+	if (size <= seg_max)
+		dmalen = 1;
+	else {
+		dmalen = size / seg_max;
+		if (dmalen * seg_max < size)
+			dmalen++;
+	}
+	return dmalen;
+}
+
+static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len,
+			   u32 data_width1, u32 data_width2)
+{
+	struct scatterlist *sg;
+	int i;
+	int len = 0;
+	int ret;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		ret = d40_size_2_dmalen(sg_dma_len(sg),
+					data_width1, data_width2);
+		if (ret < 0)
+			return ret;
+		len += ret;
+	}
+	return len;
+}
+
+
+#ifdef CONFIG_PM
+static void dma40_backup(void __iomem *baseaddr, u32 *backup,
+			 u32 *regaddr, int num, bool save)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		void __iomem *addr = baseaddr + regaddr[i];
+
+		if (save)
+			backup[i] = readl_relaxed(addr);
+		else
+			writel_relaxed(backup[i], addr);
+	}
+}
+
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+	int i;
+
+	/* Save/Restore channel specific registers */
+	for (i = 0; i < base->num_phy_chans; i++) {
+		void __iomem *addr;
+		int idx;
+
+		if (base->phy_res[i].reserved)
+			continue;
+
+		addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
+		idx = i * ARRAY_SIZE(d40_backup_regs_chan);
+
+		dma40_backup(addr, &base->reg_val_backup_chan[idx],
+			     d40_backup_regs_chan,
+			     ARRAY_SIZE(d40_backup_regs_chan),
+			     save);
+	}
+
+	/* Save/Restore global registers */
+	dma40_backup(base->virtbase, base->reg_val_backup,
+		     d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
+		     save);
+
+	/* Save/Restore registers only existing on dma40 v3 and later */
+	if (base->rev >= 3)
+		dma40_backup(base->virtbase, base->reg_val_backup_v3,
+			     d40_backup_regs_v3,
+			     ARRAY_SIZE(d40_backup_regs_v3),
+			     save);
+}
+#else
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+}
+#endif
+
+static int __d40_execute_command_phy(struct d40_chan *d40c,
+				     enum d40_command command)
+{
+	u32 status;
+	int i;
+	void __iomem *active_reg;
+	int ret = 0;
+	unsigned long flags;
+	u32 wmask;
+
+	if (command == D40_DMA_STOP) {
+		ret = __d40_execute_command_phy(d40c, D40_DMA_SUSPEND_REQ);
+		if (ret)
+			return ret;
+	}
+
+	spin_lock_irqsave(&d40c->base->execmd_lock, flags);
+
+	if (d40c->phy_chan->num % 2 == 0)
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+	else
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+	if (command == D40_DMA_SUSPEND_REQ) {
+		status = (readl(active_reg) &
+			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+			D40_CHAN_POS(d40c->phy_chan->num);
+
+		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
+			goto done;
+	}
+
+	wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num));
+	writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)),
+	       active_reg);
+
+	if (command == D40_DMA_SUSPEND_REQ) {
+
+		for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) {
+			status = (readl(active_reg) &
+				  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+				D40_CHAN_POS(d40c->phy_chan->num);
+
+			cpu_relax();
+			/*
+			 * Reduce the number of bus accesses while
+			 * waiting for the DMA to suspend.
+			 */
+			udelay(3);
+
+			if (status == D40_DMA_STOP ||
+			    status == D40_DMA_SUSPENDED)
+				break;
+		}
+
+		if (i == D40_SUSPEND_MAX_IT) {
+			chan_err(d40c,
+				"unable to suspend the chl %d (log: %d) status %x\n",
+				d40c->phy_chan->num, d40c->log_num,
+				status);
+			dump_stack();
+			ret = -EBUSY;
+		}
+
+	}
+done:
+	spin_unlock_irqrestore(&d40c->base->execmd_lock, flags);
+	return ret;
+}
+
+static void d40_term_all(struct d40_chan *d40c)
+{
+	struct d40_desc *d40d;
+	struct d40_desc *_d;
+
+	/* Release active descriptors */
+	while ((d40d = d40_first_active_get(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release queued descriptors waiting for transfer */
+	while ((d40d = d40_first_queued(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release pending descriptors */
+	while ((d40d = d40_first_pending(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release client owned descriptors */
+	if (!list_empty(&d40c->client))
+		list_for_each_entry_safe(d40d, _d, &d40c->client, node) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		}
+
+	/* Release descriptors in prepare queue */
+	if (!list_empty(&d40c->prepare_queue))
+		list_for_each_entry_safe(d40d, _d,
+					 &d40c->prepare_queue, node) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		}
+
+	d40c->pending_tx = 0;
+}
+
+static void __d40_config_set_event(struct d40_chan *d40c,
+				   enum d40_events event_type, u32 event,
+				   int reg)
+{
+	void __iomem *addr = chan_base(d40c) + reg;
+	int tries;
+	u32 status;
+
+	switch (event_type) {
+
+	case D40_DEACTIVATE_EVENTLINE:
+
+		writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+		break;
+
+	case D40_SUSPEND_REQ_EVENTLINE:
+		status = (readl(addr) & D40_EVENTLINE_MASK(event)) >>
+			  D40_EVENTLINE_POS(event);
+
+		if (status == D40_DEACTIVATE_EVENTLINE ||
+		    status == D40_SUSPEND_REQ_EVENTLINE)
+			break;
+
+		writel((D40_SUSPEND_REQ_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+
+		for (tries = 0 ; tries < D40_SUSPEND_MAX_IT; tries++) {
+
+			status = (readl(addr) & D40_EVENTLINE_MASK(event)) >>
+				  D40_EVENTLINE_POS(event);
+
+			cpu_relax();
+			/*
+			 * Reduce the number of bus accesses while
+			 * waiting for the DMA to suspend.
+			 */
+			udelay(3);
+
+			if (status == D40_DEACTIVATE_EVENTLINE)
+				break;
+		}
+
+		if (tries == D40_SUSPEND_MAX_IT) {
+			chan_err(d40c,
+				"unable to stop the event_line chl %d (log: %d)"
+				"status %x\n", d40c->phy_chan->num,
+				 d40c->log_num, status);
+		}
+		break;
+
+	case D40_ACTIVATE_EVENTLINE:
+	/*
+	 * The hardware sometimes doesn't register the enable when src and dst
+	 * event lines are active on the same logical channel.  Retry to ensure
+	 * it does.  Usually only one retry is sufficient.
+	 */
+		tries = 100;
+		while (--tries) {
+			writel((D40_ACTIVATE_EVENTLINE <<
+				D40_EVENTLINE_POS(event)) |
+				~D40_EVENTLINE_MASK(event), addr);
+
+			if (readl(addr) & D40_EVENTLINE_MASK(event))
+				break;
+		}
+
+		if (tries != 99)
+			dev_dbg(chan2dev(d40c),
+				"[%s] workaround enable S%cLNK (%d tries)\n",
+				__func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D',
+				100 - tries);
+
+		WARN_ON(!tries);
+		break;
+
+	case D40_ROUND_EVENTLINE:
+		BUG();
+		break;
+
+	}
+}
+
+static void d40_config_set_event(struct d40_chan *d40c,
+				 enum d40_events event_type)
+{
+	/* Enable event line connected to device (or memcpy) */
+	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) {
+		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
+
+		__d40_config_set_event(d40c, event_type, event,
+				       D40_CHAN_REG_SSLNK);
+	}
+
+	if (d40c->dma_cfg.dir !=  STEDMA40_PERIPH_TO_MEM) {
+		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
+
+		__d40_config_set_event(d40c, event_type, event,
+				       D40_CHAN_REG_SDLNK);
+	}
+}
+
+static u32 d40_chan_has_events(struct d40_chan *d40c)
+{
+	void __iomem *chanbase = chan_base(d40c);
+	u32 val;
+
+	val = readl(chanbase + D40_CHAN_REG_SSLNK);
+	val |= readl(chanbase + D40_CHAN_REG_SDLNK);
+
+	return val;
+}
+
+static int
+__d40_execute_command_log(struct d40_chan *d40c, enum d40_command command)
+{
+	unsigned long flags;
+	int ret = 0;
+	u32 active_status;
+	void __iomem *active_reg;
+
+	if (d40c->phy_chan->num % 2 == 0)
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+	else
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+
+	spin_lock_irqsave(&d40c->phy_chan->lock, flags);
+
+	switch (command) {
+	case D40_DMA_STOP:
+	case D40_DMA_SUSPEND_REQ:
+
+		active_status = (readl(active_reg) &
+				 D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+				 D40_CHAN_POS(d40c->phy_chan->num);
+
+		if (active_status == D40_DMA_RUN)
+			d40_config_set_event(d40c, D40_SUSPEND_REQ_EVENTLINE);
+		else
+			d40_config_set_event(d40c, D40_DEACTIVATE_EVENTLINE);
+
+		if (!d40_chan_has_events(d40c) && (command == D40_DMA_STOP))
+			ret = __d40_execute_command_phy(d40c, command);
+
+		break;
+
+	case D40_DMA_RUN:
+
+		d40_config_set_event(d40c, D40_ACTIVATE_EVENTLINE);
+		ret = __d40_execute_command_phy(d40c, command);
+		break;
+
+	case D40_DMA_SUSPENDED:
+		BUG();
+		break;
+	}
+
+	spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
+	return ret;
+}
+
+static int d40_channel_execute_command(struct d40_chan *d40c,
+				       enum d40_command command)
+{
+	if (chan_is_logical(d40c))
+		return __d40_execute_command_log(d40c, command);
+	else
+		return __d40_execute_command_phy(d40c, command);
+}
+
+static u32 d40_get_prmo(struct d40_chan *d40c)
+{
+	static const unsigned int phy_map[] = {
+		[STEDMA40_PCHAN_BASIC_MODE]
+			= D40_DREG_PRMO_PCHAN_BASIC,
+		[STEDMA40_PCHAN_MODULO_MODE]
+			= D40_DREG_PRMO_PCHAN_MODULO,
+		[STEDMA40_PCHAN_DOUBLE_DST_MODE]
+			= D40_DREG_PRMO_PCHAN_DOUBLE_DST,
+	};
+	static const unsigned int log_map[] = {
+		[STEDMA40_LCHAN_SRC_PHY_DST_LOG]
+			= D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG,
+		[STEDMA40_LCHAN_SRC_LOG_DST_PHY]
+			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY,
+		[STEDMA40_LCHAN_SRC_LOG_DST_LOG]
+			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
+	};
+
+	if (chan_is_physical(d40c))
+		return phy_map[d40c->dma_cfg.mode_opt];
+	else
+		return log_map[d40c->dma_cfg.mode_opt];
+}
+
+static void d40_config_write(struct d40_chan *d40c)
+{
+	u32 addr_base;
+	u32 var;
+
+	/* Odd addresses are even addresses + 4 */
+	addr_base = (d40c->phy_chan->num % 2) * 4;
+	/* Setup channel mode to logical or physical */
+	var = ((u32)(chan_is_logical(d40c)) + 1) <<
+		D40_CHAN_POS(d40c->phy_chan->num);
+	writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
+
+	/* Setup operational mode option register */
+	var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num);
+
+	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
+
+	if (chan_is_logical(d40c)) {
+		int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS)
+			   & D40_SREG_ELEM_LOG_LIDX_MASK;
+		void __iomem *chanbase = chan_base(d40c);
+
+		/* Set default config for CFG reg */
+		writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG);
+		writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG);
+
+		/* Set LIDX for lcla */
+		writel(lidx, chanbase + D40_CHAN_REG_SSELT);
+		writel(lidx, chanbase + D40_CHAN_REG_SDELT);
+
+		/* Clear LNK which will be used by d40_chan_has_events() */
+		writel(0, chanbase + D40_CHAN_REG_SSLNK);
+		writel(0, chanbase + D40_CHAN_REG_SDLNK);
+	}
+}
+
+static u32 d40_residue(struct d40_chan *d40c)
+{
+	u32 num_elt;
+
+	if (chan_is_logical(d40c))
+		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
+			>> D40_MEM_LCSP2_ECNT_POS;
+	else {
+		u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT);
+		num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK)
+			  >> D40_SREG_ELEM_PHY_ECNT_POS;
+	}
+
+	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
+}
+
+static bool d40_tx_is_linked(struct d40_chan *d40c)
+{
+	bool is_link;
+
+	if (chan_is_logical(d40c))
+		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
+	else
+		is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK)
+			  & D40_SREG_LNK_PHYS_LNK_MASK;
+
+	return is_link;
+}
+
+static int d40_pause(struct d40_chan *d40c)
+{
+	int res = 0;
+	unsigned long flags;
+
+	if (!d40c->busy)
+		return 0;
+
+	pm_runtime_get_sync(d40c->base->dev);
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
+
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return res;
+}
+
+static int d40_resume(struct d40_chan *d40c)
+{
+	int res = 0;
+	unsigned long flags;
+
+	if (!d40c->busy)
+		return 0;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	pm_runtime_get_sync(d40c->base->dev);
+
+	/* If bytes left to transfer or linked tx resume job */
+	if (d40_residue(d40c) || d40_tx_is_linked(d40c))
+		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
+
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return res;
+}
+
+static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct d40_chan *d40c = container_of(tx->chan,
+					     struct d40_chan,
+					     chan);
+	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	cookie = dma_cookie_assign(tx);
+	d40_desc_queue(d40c, d40d);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+
+	return cookie;
+}
+
+static int d40_start(struct d40_chan *d40c)
+{
+	return d40_channel_execute_command(d40c, D40_DMA_RUN);
+}
+
+static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
+{
+	struct d40_desc *d40d;
+	int err;
+
+	/* Start queued jobs, if any */
+	d40d = d40_first_queued(d40c);
+
+	if (d40d != NULL) {
+		if (!d40c->busy) {
+			d40c->busy = true;
+			pm_runtime_get_sync(d40c->base->dev);
+		}
+
+		/* Remove from queue */
+		d40_desc_remove(d40d);
+
+		/* Add to active queue */
+		d40_desc_submit(d40c, d40d);
+
+		/* Initiate DMA job */
+		d40_desc_load(d40c, d40d);
+
+		/* Start dma job */
+		err = d40_start(d40c);
+
+		if (err)
+			return NULL;
+	}
+
+	return d40d;
+}
+
+/* called from interrupt context */
+static void dma_tc_handle(struct d40_chan *d40c)
+{
+	struct d40_desc *d40d;
+
+	/* Get first active entry from list */
+	d40d = d40_first_active_get(d40c);
+
+	if (d40d == NULL)
+		return;
+
+	if (d40d->cyclic) {
+		/*
+		 * If this was a paritially loaded list, we need to reloaded
+		 * it, and only when the list is completed.  We need to check
+		 * for done because the interrupt will hit for every link, and
+		 * not just the last one.
+		 */
+		if (d40d->lli_current < d40d->lli_len
+		    && !d40_tx_is_linked(d40c)
+		    && !d40_residue(d40c)) {
+			d40_lcla_free_all(d40c, d40d);
+			d40_desc_load(d40c, d40d);
+			(void) d40_start(d40c);
+
+			if (d40d->lli_current == d40d->lli_len)
+				d40d->lli_current = 0;
+		}
+	} else {
+		d40_lcla_free_all(d40c, d40d);
+
+		if (d40d->lli_current < d40d->lli_len) {
+			d40_desc_load(d40c, d40d);
+			/* Start dma job */
+			(void) d40_start(d40c);
+			return;
+		}
+
+		if (d40_queue_start(d40c) == NULL)
+			d40c->busy = false;
+		pm_runtime_mark_last_busy(d40c->base->dev);
+		pm_runtime_put_autosuspend(d40c->base->dev);
+	}
+
+	d40c->pending_tx++;
+	tasklet_schedule(&d40c->tasklet);
+
+}
+
+static void dma_tasklet(unsigned long data)
+{
+	struct d40_chan *d40c = (struct d40_chan *) data;
+	struct d40_desc *d40d;
+	unsigned long flags;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	/* Get first active entry from list */
+	d40d = d40_first_active_get(d40c);
+	if (d40d == NULL)
+		goto err;
+
+	if (!d40d->cyclic)
+		dma_cookie_complete(&d40d->txd);
+
+	/*
+	 * If terminating a channel pending_tx is set to zero.
+	 * This prevents any finished active jobs to return to the client.
+	 */
+	if (d40c->pending_tx == 0) {
+		spin_unlock_irqrestore(&d40c->lock, flags);
+		return;
+	}
+
+	/* Callback to client */
+	callback = d40d->txd.callback;
+	callback_param = d40d->txd.callback_param;
+
+	if (!d40d->cyclic) {
+		if (async_tx_test_ack(&d40d->txd)) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		} else {
+			if (!d40d->is_in_client_list) {
+				d40_desc_remove(d40d);
+				d40_lcla_free_all(d40c, d40d);
+				list_add_tail(&d40d->node, &d40c->client);
+				d40d->is_in_client_list = true;
+			}
+		}
+	}
+
+	d40c->pending_tx--;
+
+	if (d40c->pending_tx)
+		tasklet_schedule(&d40c->tasklet);
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
+
+	if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT))
+		callback(callback_param);
+
+	return;
+
+err:
+	/* Rescue manouver if receiving double interrupts */
+	if (d40c->pending_tx > 0)
+		d40c->pending_tx--;
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static irqreturn_t d40_handle_interrupt(int irq, void *data)
+{
+	static const struct d40_interrupt_lookup il[] = {
+		{D40_DREG_LCTIS0, D40_DREG_LCICR0, false,  0},
+		{D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32},
+		{D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64},
+		{D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96},
+		{D40_DREG_LCEIS0, D40_DREG_LCICR0, true,   0},
+		{D40_DREG_LCEIS1, D40_DREG_LCICR1, true,  32},
+		{D40_DREG_LCEIS2, D40_DREG_LCICR2, true,  64},
+		{D40_DREG_LCEIS3, D40_DREG_LCICR3, true,  96},
+		{D40_DREG_PCTIS,  D40_DREG_PCICR,  false, D40_PHY_CHAN},
+		{D40_DREG_PCEIS,  D40_DREG_PCICR,  true,  D40_PHY_CHAN},
+	};
+
+	int i;
+	u32 regs[ARRAY_SIZE(il)];
+	u32 idx;
+	u32 row;
+	long chan = -1;
+	struct d40_chan *d40c;
+	unsigned long flags;
+	struct d40_base *base = data;
+
+	spin_lock_irqsave(&base->interrupt_lock, flags);
+
+	/* Read interrupt status of both logical and physical channels */
+	for (i = 0; i < ARRAY_SIZE(il); i++)
+		regs[i] = readl(base->virtbase + il[i].src);
+
+	for (;;) {
+
+		chan = find_next_bit((unsigned long *)regs,
+				     BITS_PER_LONG * ARRAY_SIZE(il), chan + 1);
+
+		/* No more set bits found? */
+		if (chan == BITS_PER_LONG * ARRAY_SIZE(il))
+			break;
+
+		row = chan / BITS_PER_LONG;
+		idx = chan & (BITS_PER_LONG - 1);
+
+		/* ACK interrupt */
+		writel(1 << idx, base->virtbase + il[row].clr);
+
+		if (il[row].offset == D40_PHY_CHAN)
+			d40c = base->lookup_phy_chans[idx];
+		else
+			d40c = base->lookup_log_chans[il[row].offset + idx];
+		spin_lock(&d40c->lock);
+
+		if (!il[row].is_error)
+			dma_tc_handle(d40c);
+		else
+			d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n",
+				chan, il[row].offset, idx);
+
+		spin_unlock(&d40c->lock);
+	}
+
+	spin_unlock_irqrestore(&base->interrupt_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int d40_validate_conf(struct d40_chan *d40c,
+			     struct stedma40_chan_cfg *conf)
+{
+	int res = 0;
+	u32 dst_event_group = D40_TYPE_TO_GROUP(conf->dst_dev_type);
+	u32 src_event_group = D40_TYPE_TO_GROUP(conf->src_dev_type);
+	bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
+
+	if (!conf->dir) {
+		chan_err(d40c, "Invalid direction.\n");
+		res = -EINVAL;
+	}
+
+	if (conf->dst_dev_type != STEDMA40_DEV_DST_MEMORY &&
+	    d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 &&
+	    d40c->runtime_addr == 0) {
+
+		chan_err(d40c, "Invalid TX channel address (%d)\n",
+			 conf->dst_dev_type);
+		res = -EINVAL;
+	}
+
+	if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY &&
+	    d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 &&
+	    d40c->runtime_addr == 0) {
+		chan_err(d40c, "Invalid RX channel address (%d)\n",
+			conf->src_dev_type);
+		res = -EINVAL;
+	}
+
+	if (conf->dir == STEDMA40_MEM_TO_PERIPH &&
+	    dst_event_group == STEDMA40_DEV_DST_MEMORY) {
+		chan_err(d40c, "Invalid dst\n");
+		res = -EINVAL;
+	}
+
+	if (conf->dir == STEDMA40_PERIPH_TO_MEM &&
+	    src_event_group == STEDMA40_DEV_SRC_MEMORY) {
+		chan_err(d40c, "Invalid src\n");
+		res = -EINVAL;
+	}
+
+	if (src_event_group == STEDMA40_DEV_SRC_MEMORY &&
+	    dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) {
+		chan_err(d40c, "No event line\n");
+		res = -EINVAL;
+	}
+
+	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH &&
+	    (src_event_group != dst_event_group)) {
+		chan_err(d40c, "Invalid event group\n");
+		res = -EINVAL;
+	}
+
+	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH) {
+		/*
+		 * DMAC HW supports it. Will be added to this driver,
+		 * in case any dma client requires it.
+		 */
+		chan_err(d40c, "periph to periph not supported\n");
+		res = -EINVAL;
+	}
+
+	if (d40_psize_2_burst_size(is_log, conf->src_info.psize) *
+	    (1 << conf->src_info.data_width) !=
+	    d40_psize_2_burst_size(is_log, conf->dst_info.psize) *
+	    (1 << conf->dst_info.data_width)) {
+		/*
+		 * The DMAC hardware only supports
+		 * src (burst x width) == dst (burst x width)
+		 */
+
+		chan_err(d40c, "src (burst x width) != dst (burst x width)\n");
+		res = -EINVAL;
+	}
+
+	return res;
+}
+
+static bool d40_alloc_mask_set(struct d40_phy_res *phy,
+			       bool is_src, int log_event_line, bool is_log,
+			       bool *first_user)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&phy->lock, flags);
+
+	*first_user = ((phy->allocated_src | phy->allocated_dst)
+			== D40_ALLOC_FREE);
+
+	if (!is_log) {
+		/* Physical interrupts are masked per physical full channel */
+		if (phy->allocated_src == D40_ALLOC_FREE &&
+		    phy->allocated_dst == D40_ALLOC_FREE) {
+			phy->allocated_dst = D40_ALLOC_PHY;
+			phy->allocated_src = D40_ALLOC_PHY;
+			goto found;
+		} else
+			goto not_found;
+	}
+
+	/* Logical channel */
+	if (is_src) {
+		if (phy->allocated_src == D40_ALLOC_PHY)
+			goto not_found;
+
+		if (phy->allocated_src == D40_ALLOC_FREE)
+			phy->allocated_src = D40_ALLOC_LOG_FREE;
+
+		if (!(phy->allocated_src & (1 << log_event_line))) {
+			phy->allocated_src |= 1 << log_event_line;
+			goto found;
+		} else
+			goto not_found;
+	} else {
+		if (phy->allocated_dst == D40_ALLOC_PHY)
+			goto not_found;
+
+		if (phy->allocated_dst == D40_ALLOC_FREE)
+			phy->allocated_dst = D40_ALLOC_LOG_FREE;
+
+		if (!(phy->allocated_dst & (1 << log_event_line))) {
+			phy->allocated_dst |= 1 << log_event_line;
+			goto found;
+		} else
+			goto not_found;
+	}
+
+not_found:
+	spin_unlock_irqrestore(&phy->lock, flags);
+	return false;
+found:
+	spin_unlock_irqrestore(&phy->lock, flags);
+	return true;
+}
+
+static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src,
+			       int log_event_line)
+{
+	unsigned long flags;
+	bool is_free = false;
+
+	spin_lock_irqsave(&phy->lock, flags);
+	if (!log_event_line) {
+		phy->allocated_dst = D40_ALLOC_FREE;
+		phy->allocated_src = D40_ALLOC_FREE;
+		is_free = true;
+		goto out;
+	}
+
+	/* Logical channel */
+	if (is_src) {
+		phy->allocated_src &= ~(1 << log_event_line);
+		if (phy->allocated_src == D40_ALLOC_LOG_FREE)
+			phy->allocated_src = D40_ALLOC_FREE;
+	} else {
+		phy->allocated_dst &= ~(1 << log_event_line);
+		if (phy->allocated_dst == D40_ALLOC_LOG_FREE)
+			phy->allocated_dst = D40_ALLOC_FREE;
+	}
+
+	is_free = ((phy->allocated_src | phy->allocated_dst) ==
+		   D40_ALLOC_FREE);
+
+out:
+	spin_unlock_irqrestore(&phy->lock, flags);
+
+	return is_free;
+}
+
+static int d40_allocate_channel(struct d40_chan *d40c, bool *first_phy_user)
+{
+	int dev_type;
+	int event_group;
+	int event_line;
+	struct d40_phy_res *phys;
+	int i;
+	int j;
+	int log_num;
+	bool is_src;
+	bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL;
+
+	phys = d40c->base->phy_res;
+
+	if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
+		dev_type = d40c->dma_cfg.src_dev_type;
+		log_num = 2 * dev_type;
+		is_src = true;
+	} else if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
+		   d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
+		/* dst event lines are used for logical memcpy */
+		dev_type = d40c->dma_cfg.dst_dev_type;
+		log_num = 2 * dev_type + 1;
+		is_src = false;
+	} else
+		return -EINVAL;
+
+	event_group = D40_TYPE_TO_GROUP(dev_type);
+	event_line = D40_TYPE_TO_EVENT(dev_type);
+
+	if (!is_log) {
+		if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
+			/* Find physical half channel */
+			for (i = 0; i < d40c->base->num_phy_chans; i++) {
+
+				if (d40_alloc_mask_set(&phys[i], is_src,
+						       0, is_log,
+						       first_phy_user))
+					goto found_phy;
+			}
+		} else
+			for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
+				int phy_num = j  + event_group * 2;
+				for (i = phy_num; i < phy_num + 2; i++) {
+					if (d40_alloc_mask_set(&phys[i],
+							       is_src,
+							       0,
+							       is_log,
+							       first_phy_user))
+						goto found_phy;
+				}
+			}
+		return -EINVAL;
+found_phy:
+		d40c->phy_chan = &phys[i];
+		d40c->log_num = D40_PHY_CHAN;
+		goto out;
+	}
+	if (dev_type == -1)
+		return -EINVAL;
+
+	/* Find logical channel */
+	for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
+		int phy_num = j + event_group * 2;
+
+		if (d40c->dma_cfg.use_fixed_channel) {
+			i = d40c->dma_cfg.phy_channel;
+
+			if ((i != phy_num) && (i != phy_num + 1)) {
+				dev_err(chan2dev(d40c),
+					"invalid fixed phy channel %d\n", i);
+				return -EINVAL;
+			}
+
+			if (d40_alloc_mask_set(&phys[i], is_src, event_line,
+					       is_log, first_phy_user))
+				goto found_log;
+
+			dev_err(chan2dev(d40c),
+				"could not allocate fixed phy channel %d\n", i);
+			return -EINVAL;
+		}
+
+		/*
+		 * Spread logical channels across all available physical rather
+		 * than pack every logical channel at the first available phy
+		 * channels.
+		 */
+		if (is_src) {
+			for (i = phy_num; i < phy_num + 2; i++) {
+				if (d40_alloc_mask_set(&phys[i], is_src,
+						       event_line, is_log,
+						       first_phy_user))
+					goto found_log;
+			}
+		} else {
+			for (i = phy_num + 1; i >= phy_num; i--) {
+				if (d40_alloc_mask_set(&phys[i], is_src,
+						       event_line, is_log,
+						       first_phy_user))
+					goto found_log;
+			}
+		}
+	}
+	return -EINVAL;
+
+found_log:
+	d40c->phy_chan = &phys[i];
+	d40c->log_num = log_num;
+out:
+
+	if (is_log)
+		d40c->base->lookup_log_chans[d40c->log_num] = d40c;
+	else
+		d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c;
+
+	return 0;
+
+}
+
+static int d40_config_memcpy(struct d40_chan *d40c)
+{
+	dma_cap_mask_t cap = d40c->chan.device->cap_mask;
+
+	if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) {
+		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_log;
+		d40c->dma_cfg.src_dev_type = STEDMA40_DEV_SRC_MEMORY;
+		d40c->dma_cfg.dst_dev_type = d40c->base->plat_data->
+			memcpy[d40c->chan.chan_id];
+
+	} else if (dma_has_cap(DMA_MEMCPY, cap) &&
+		   dma_has_cap(DMA_SLAVE, cap)) {
+		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy;
+	} else {
+		chan_err(d40c, "No memcpy\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int d40_free_dma(struct d40_chan *d40c)
+{
+
+	int res = 0;
+	u32 event;
+	struct d40_phy_res *phy = d40c->phy_chan;
+	bool is_src;
+
+	/* Terminate all queued and active transfers */
+	d40_term_all(d40c);
+
+	if (phy == NULL) {
+		chan_err(d40c, "phy == null\n");
+		return -EINVAL;
+	}
+
+	if (phy->allocated_src == D40_ALLOC_FREE &&
+	    phy->allocated_dst == D40_ALLOC_FREE) {
+		chan_err(d40c, "channel already free\n");
+		return -EINVAL;
+	}
+
+	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
+	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
+		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
+		is_src = false;
+	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
+		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
+		is_src = true;
+	} else {
+		chan_err(d40c, "Unknown direction\n");
+		return -EINVAL;
+	}
+
+	pm_runtime_get_sync(d40c->base->dev);
+	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
+	if (res) {
+		chan_err(d40c, "stop failed\n");
+		goto out;
+	}
+
+	d40_alloc_mask_free(phy, is_src, chan_is_logical(d40c) ? event : 0);
+
+	if (chan_is_logical(d40c))
+		d40c->base->lookup_log_chans[d40c->log_num] = NULL;
+	else
+		d40c->base->lookup_phy_chans[phy->num] = NULL;
+
+	if (d40c->busy) {
+		pm_runtime_mark_last_busy(d40c->base->dev);
+		pm_runtime_put_autosuspend(d40c->base->dev);
+	}
+
+	d40c->busy = false;
+	d40c->phy_chan = NULL;
+	d40c->configured = false;
+out:
+
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	return res;
+}
+
+static bool d40_is_paused(struct d40_chan *d40c)
+{
+	void __iomem *chanbase = chan_base(d40c);
+	bool is_paused = false;
+	unsigned long flags;
+	void __iomem *active_reg;
+	u32 status;
+	u32 event;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	if (chan_is_physical(d40c)) {
+		if (d40c->phy_chan->num % 2 == 0)
+			active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+		else
+			active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+		status = (readl(active_reg) &
+			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+			D40_CHAN_POS(d40c->phy_chan->num);
+		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
+			is_paused = true;
+
+		goto _exit;
+	}
+
+	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
+	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
+		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
+		status = readl(chanbase + D40_CHAN_REG_SDLNK);
+	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
+		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
+		status = readl(chanbase + D40_CHAN_REG_SSLNK);
+	} else {
+		chan_err(d40c, "Unknown direction\n");
+		goto _exit;
+	}
+
+	status = (status & D40_EVENTLINE_MASK(event)) >>
+		D40_EVENTLINE_POS(event);
+
+	if (status != D40_DMA_RUN)
+		is_paused = true;
+_exit:
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return is_paused;
+
+}
+
+
+static u32 stedma40_residue(struct dma_chan *chan)
+{
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	u32 bytes_left;
+	unsigned long flags;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	bytes_left = d40_residue(d40c);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+
+	return bytes_left;
+}
+
+static int
+d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	int ret;
+
+	ret = d40_log_sg_to_lli(sg_src, sg_len,
+				src_dev_addr,
+				desc->lli_log.src,
+				chan->log_def.lcsp1,
+				src_info->data_width,
+				dst_info->data_width);
+
+	ret = d40_log_sg_to_lli(sg_dst, sg_len,
+				dst_dev_addr,
+				desc->lli_log.dst,
+				chan->log_def.lcsp3,
+				dst_info->data_width,
+				src_info->data_width);
+
+	return ret < 0 ? ret : 0;
+}
+
+static int
+d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	unsigned long flags = 0;
+	int ret;
+
+	if (desc->cyclic)
+		flags |= LLI_CYCLIC | LLI_TERM_INT;
+
+	ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr,
+				desc->lli_phy.src,
+				virt_to_phys(desc->lli_phy.src),
+				chan->src_def_cfg,
+				src_info, dst_info, flags);
+
+	ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
+				desc->lli_phy.dst,
+				virt_to_phys(desc->lli_phy.dst),
+				chan->dst_def_cfg,
+				dst_info, src_info, flags);
+
+	dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
+				   desc->lli_pool.size, DMA_TO_DEVICE);
+
+	return ret < 0 ? ret : 0;
+}
+
+
+static struct d40_desc *
+d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
+	      unsigned int sg_len, unsigned long dma_flags)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct d40_desc *desc;
+	int ret;
+
+	desc = d40_desc_get(chan);
+	if (!desc)
+		return NULL;
+
+	desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width,
+					cfg->dst_info.data_width);
+	if (desc->lli_len < 0) {
+		chan_err(chan, "Unaligned size\n");
+		goto err;
+	}
+
+	ret = d40_pool_lli_alloc(chan, desc, desc->lli_len);
+	if (ret < 0) {
+		chan_err(chan, "Could not allocate lli\n");
+		goto err;
+	}
+
+
+	desc->lli_current = 0;
+	desc->txd.flags = dma_flags;
+	desc->txd.tx_submit = d40_tx_submit;
+
+	dma_async_tx_descriptor_init(&desc->txd, &chan->chan);
+
+	return desc;
+
+err:
+	d40_desc_free(chan, desc);
+	return NULL;
+}
+
+static dma_addr_t
+d40_get_dev_addr(struct d40_chan *chan, enum dma_transfer_direction direction)
+{
+	struct stedma40_platform_data *plat = chan->base->plat_data;
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	dma_addr_t addr = 0;
+
+	if (chan->runtime_addr)
+		return chan->runtime_addr;
+
+	if (direction == DMA_DEV_TO_MEM)
+		addr = plat->dev_rx[cfg->src_dev_type];
+	else if (direction == DMA_MEM_TO_DEV)
+		addr = plat->dev_tx[cfg->dst_dev_type];
+
+	return addr;
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
+	    struct scatterlist *sg_dst, unsigned int sg_len,
+	    enum dma_transfer_direction direction, unsigned long dma_flags)
+{
+	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
+	dma_addr_t src_dev_addr = 0;
+	dma_addr_t dst_dev_addr = 0;
+	struct d40_desc *desc;
+	unsigned long flags;
+	int ret;
+
+	if (!chan->phy_chan) {
+		chan_err(chan, "Cannot prepare unallocated channel\n");
+		return NULL;
+	}
+
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags);
+	if (desc == NULL)
+		goto err;
+
+	if (sg_next(&sg_src[sg_len - 1]) == sg_src)
+		desc->cyclic = true;
+
+	if (direction != DMA_TRANS_NONE) {
+		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
+
+		if (direction == DMA_DEV_TO_MEM)
+			src_dev_addr = dev_addr;
+		else if (direction == DMA_MEM_TO_DEV)
+			dst_dev_addr = dev_addr;
+	}
+
+	if (chan_is_logical(chan))
+		ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst,
+				      sg_len, src_dev_addr, dst_dev_addr);
+	else
+		ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst,
+				      sg_len, src_dev_addr, dst_dev_addr);
+
+	if (ret) {
+		chan_err(chan, "Failed to prepare %s sg job: %d\n",
+			 chan_is_logical(chan) ? "log" : "phy", ret);
+		goto err;
+	}
+
+	/*
+	 * add descriptor to the prepare queue in order to be able
+	 * to free them later in terminate_all
+	 */
+	list_add_tail(&desc->node, &chan->prepare_queue);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return &desc->txd;
+
+err:
+	if (desc)
+		d40_desc_free(chan, desc);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return NULL;
+}
+
+bool stedma40_filter(struct dma_chan *chan, void *data)
+{
+	struct stedma40_chan_cfg *info = data;
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	int err;
+
+	if (data) {
+		err = d40_validate_conf(d40c, info);
+		if (!err)
+			d40c->dma_cfg = *info;
+	} else
+		err = d40_config_memcpy(d40c);
+
+	if (!err)
+		d40c->configured = true;
+
+	return err == 0;
+}
+EXPORT_SYMBOL(stedma40_filter);
+
+static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src)
+{
+	bool realtime = d40c->dma_cfg.realtime;
+	bool highprio = d40c->dma_cfg.high_priority;
+	u32 prioreg = highprio ? D40_DREG_PSEG1 : D40_DREG_PCEG1;
+	u32 rtreg = realtime ? D40_DREG_RSEG1 : D40_DREG_RCEG1;
+	u32 event = D40_TYPE_TO_EVENT(dev_type);
+	u32 group = D40_TYPE_TO_GROUP(dev_type);
+	u32 bit = 1 << event;
+
+	/* Destination event lines are stored in the upper halfword */
+	if (!src)
+		bit <<= 16;
+
+	writel(bit, d40c->base->virtbase + prioreg + group * 4);
+	writel(bit, d40c->base->virtbase + rtreg + group * 4);
+}
+
+static void d40_set_prio_realtime(struct d40_chan *d40c)
+{
+	if (d40c->base->rev < 3)
+		return;
+
+	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.src_dev_type, true);
+
+	if ((d40c->dma_cfg.dir ==  STEDMA40_MEM_TO_PERIPH) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.dst_dev_type, false);
+}
+
+/* DMA ENGINE functions */
+static int d40_alloc_chan_resources(struct dma_chan *chan)
+{
+	int err;
+	unsigned long flags;
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	bool is_free_phy;
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	dma_cookie_init(chan);
+
+	/* If no dma configuration is set use default configuration (memcpy) */
+	if (!d40c->configured) {
+		err = d40_config_memcpy(d40c);
+		if (err) {
+			chan_err(d40c, "Failed to configure memcpy channel\n");
+			goto fail;
+		}
+	}
+
+	err = d40_allocate_channel(d40c, &is_free_phy);
+	if (err) {
+		chan_err(d40c, "Failed to allocate channel\n");
+		d40c->configured = false;
+		goto fail;
+	}
+
+	pm_runtime_get_sync(d40c->base->dev);
+	/* Fill in basic CFG register values */
+	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
+		    &d40c->dst_def_cfg, chan_is_logical(d40c));
+
+	d40_set_prio_realtime(d40c);
+
+	if (chan_is_logical(d40c)) {
+		d40_log_cfg(&d40c->dma_cfg,
+			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+
+		if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM)
+			d40c->lcpa = d40c->base->lcpa_base +
+			  d40c->dma_cfg.src_dev_type * D40_LCPA_CHAN_SIZE;
+		else
+			d40c->lcpa = d40c->base->lcpa_base +
+			  d40c->dma_cfg.dst_dev_type *
+			  D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
+	}
+
+	dev_dbg(chan2dev(d40c), "allocated %s channel (phy %d%s)\n",
+		 chan_is_logical(d40c) ? "logical" : "physical",
+		 d40c->phy_chan->num,
+		 d40c->dma_cfg.use_fixed_channel ? ", fixed" : "");
+
+
+	/*
+	 * Only write channel configuration to the DMA if the physical
+	 * resource is free. In case of multiple logical channels
+	 * on the same physical resource, only the first write is necessary.
+	 */
+	if (is_free_phy)
+		d40_config_write(d40c);
+fail:
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return err;
+}
+
+static void d40_free_chan_resources(struct dma_chan *chan)
+{
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	int err;
+	unsigned long flags;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Cannot free unallocated channel\n");
+		return;
+	}
+
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	err = d40_free_dma(d40c);
+
+	if (err)
+		chan_err(d40c, "Failed to free channel\n");
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
+						       dma_addr_t dst,
+						       dma_addr_t src,
+						       size_t size,
+						       unsigned long dma_flags)
+{
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
+
+	sg_init_table(&dst_sg, 1);
+	sg_init_table(&src_sg, 1);
+
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_address(&src_sg) = src;
+
+	sg_dma_len(&dst_sg) = size;
+	sg_dma_len(&src_sg) = size;
+
+	return d40_prep_sg(chan, &src_sg, &dst_sg, 1, DMA_NONE, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_memcpy_sg(struct dma_chan *chan,
+		   struct scatterlist *dst_sg, unsigned int dst_nents,
+		   struct scatterlist *src_sg, unsigned int src_nents,
+		   unsigned long dma_flags)
+{
+	if (dst_nents != src_nents)
+		return NULL;
+
+	return d40_prep_sg(chan, src_sg, dst_sg, src_nents, DMA_NONE, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
+							 struct scatterlist *sgl,
+							 unsigned int sg_len,
+							 enum dma_transfer_direction direction,
+							 unsigned long dma_flags,
+							 void *context)
+{
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
+		return NULL;
+
+	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *
+dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+		     size_t buf_len, size_t period_len,
+		     enum dma_transfer_direction direction, void *context)
+{
+	unsigned int periods = buf_len / period_len;
+	struct dma_async_tx_descriptor *txd;
+	struct scatterlist *sg;
+	int i;
+
+	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
+	for (i = 0; i < periods; i++) {
+		sg_dma_address(&sg[i]) = dma_addr;
+		sg_dma_len(&sg[i]) = period_len;
+		dma_addr += period_len;
+	}
+
+	sg[periods].offset = 0;
+	sg[periods].length = 0;
+	sg[periods].page_link =
+		((unsigned long)sg | 0x01) & ~0x02;
+
+	txd = d40_prep_sg(chan, sg, sg, periods, direction,
+			  DMA_PREP_INTERRUPT);
+
+	kfree(sg);
+
+	return txd;
+}
+
+static enum dma_status d40_tx_status(struct dma_chan *chan,
+				     dma_cookie_t cookie,
+				     struct dma_tx_state *txstate)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	enum dma_status ret;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Cannot read status of unallocated channel\n");
+		return -EINVAL;
+	}
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS)
+		dma_set_residue(txstate, stedma40_residue(chan));
+
+	if (d40_is_paused(d40c))
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void d40_issue_pending(struct dma_chan *chan)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	unsigned long flags;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return;
+	}
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	list_splice_tail_init(&d40c->pending_queue, &d40c->queue);
+
+	/* Busy means that queued jobs are already being processed */
+	if (!d40c->busy)
+		(void) d40_queue_start(d40c);
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static void d40_terminate_all(struct dma_chan *chan)
+{
+	unsigned long flags;
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	int ret;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	pm_runtime_get_sync(d40c->base->dev);
+	ret = d40_channel_execute_command(d40c, D40_DMA_STOP);
+	if (ret)
+		chan_err(d40c, "Failed to stop channel\n");
+
+	d40_term_all(d40c);
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	if (d40c->busy) {
+		pm_runtime_mark_last_busy(d40c->base->dev);
+		pm_runtime_put_autosuspend(d40c->base->dev);
+	}
+	d40c->busy = false;
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static int
+dma40_config_to_halfchannel(struct d40_chan *d40c,
+			    struct stedma40_half_channel_info *info,
+			    enum dma_slave_buswidth width,
+			    u32 maxburst)
+{
+	enum stedma40_periph_data_width addr_width;
+	int psize;
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		addr_width = STEDMA40_BYTE_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		addr_width = STEDMA40_HALFWORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		addr_width = STEDMA40_WORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
+		break;
+	default:
+		dev_err(d40c->base->dev,
+			"illegal peripheral address width "
+			"requested (%d)\n",
+			width);
+		return -EINVAL;
+	}
+
+	if (chan_is_logical(d40c)) {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_LOG_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_LOG_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_LOG_4;
+		else
+			psize = STEDMA40_PSIZE_LOG_1;
+	} else {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_PHY_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_PHY_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_PHY_4;
+		else
+			psize = STEDMA40_PSIZE_PHY_1;
+	}
+
+	info->data_width = addr_width;
+	info->psize = psize;
+	info->flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+
+	return 0;
+}
+
+/* Runtime reconfiguration extension */
+static int d40_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
+	dma_addr_t config_addr;
+	u32 src_maxburst, dst_maxburst;
+	int ret;
+
+	src_addr_width = config->src_addr_width;
+	src_maxburst = config->src_maxburst;
+	dst_addr_width = config->dst_addr_width;
+	dst_maxburst = config->dst_maxburst;
+
+	if (config->direction == DMA_DEV_TO_MEM) {
+		dma_addr_t dev_addr_rx =
+			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
+
+		config_addr = config->src_addr;
+		if (dev_addr_rx)
+			dev_dbg(d40c->base->dev,
+				"channel has a pre-wired RX address %08x "
+				"overriding with %08x\n",
+				dev_addr_rx, config_addr);
+		if (cfg->dir != STEDMA40_PERIPH_TO_MEM)
+			dev_dbg(d40c->base->dev,
+				"channel was not configured for peripheral "
+				"to memory transfer (%d) overriding\n",
+				cfg->dir);
+		cfg->dir = STEDMA40_PERIPH_TO_MEM;
+
+		/* Configure the memory side */
+		if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			dst_addr_width = src_addr_width;
+		if (dst_maxburst == 0)
+			dst_maxburst = src_maxburst;
+
+	} else if (config->direction == DMA_MEM_TO_DEV) {
+		dma_addr_t dev_addr_tx =
+			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
+
+		config_addr = config->dst_addr;
+		if (dev_addr_tx)
+			dev_dbg(d40c->base->dev,
+				"channel has a pre-wired TX address %08x "
+				"overriding with %08x\n",
+				dev_addr_tx, config_addr);
+		if (cfg->dir != STEDMA40_MEM_TO_PERIPH)
+			dev_dbg(d40c->base->dev,
+				"channel was not configured for memory "
+				"to peripheral transfer (%d) overriding\n",
+				cfg->dir);
+		cfg->dir = STEDMA40_MEM_TO_PERIPH;
+
+		/* Configure the memory side */
+		if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			src_addr_width = dst_addr_width;
+		if (src_maxburst == 0)
+			src_maxburst = dst_maxburst;
+	} else {
+		dev_err(d40c->base->dev,
+			"unrecognized channel direction %d\n",
+			config->direction);
+		return -EINVAL;
+	}
+
+	if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) {
+		dev_err(d40c->base->dev,
+			"src/dst width/maxburst mismatch: %d*%d != %d*%d\n",
+			src_maxburst,
+			src_addr_width,
+			dst_maxburst,
+			dst_addr_width);
+		return -EINVAL;
+	}
+
+	ret = dma40_config_to_halfchannel(d40c, &cfg->src_info,
+					  src_addr_width,
+					  src_maxburst);
+	if (ret)
+		return ret;
+
+	ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info,
+					  dst_addr_width,
+					  dst_maxburst);
+	if (ret)
+		return ret;
+
+	/* Fill in register values */
+	if (chan_is_logical(d40c))
+		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+	else
+		d40_phy_cfg(cfg, &d40c->src_def_cfg,
+			    &d40c->dst_def_cfg, false);
+
+	/* These settings will take precedence later */
+	d40c->runtime_addr = config_addr;
+	d40c->runtime_direction = config->direction;
+	dev_dbg(d40c->base->dev,
+		"configured channel %s for %s, data width %d/%d, "
+		"maxburst %d/%d elements, LE, no flow control\n",
+		dma_chan_name(chan),
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
+		src_addr_width, dst_addr_width,
+		src_maxburst, dst_maxburst);
+
+	return 0;
+}
+
+static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		       unsigned long arg)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return -EINVAL;
+	}
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		d40_terminate_all(chan);
+		return 0;
+	case DMA_PAUSE:
+		return d40_pause(d40c);
+	case DMA_RESUME:
+		return d40_resume(d40c);
+	case DMA_SLAVE_CONFIG:
+		return d40_set_runtime_config(chan,
+			(struct dma_slave_config *) arg);
+	default:
+		break;
+	}
+
+	/* Other commands are unimplemented */
+	return -ENXIO;
+}
+
+/* Initialization functions */
+
+static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
+				 struct d40_chan *chans, int offset,
+				 int num_chans)
+{
+	int i = 0;
+	struct d40_chan *d40c;
+
+	INIT_LIST_HEAD(&dma->channels);
+
+	for (i = offset; i < offset + num_chans; i++) {
+		d40c = &chans[i];
+		d40c->base = base;
+		d40c->chan.device = dma;
+
+		spin_lock_init(&d40c->lock);
+
+		d40c->log_num = D40_PHY_CHAN;
+
+		INIT_LIST_HEAD(&d40c->active);
+		INIT_LIST_HEAD(&d40c->queue);
+		INIT_LIST_HEAD(&d40c->pending_queue);
+		INIT_LIST_HEAD(&d40c->client);
+		INIT_LIST_HEAD(&d40c->prepare_queue);
+
+		tasklet_init(&d40c->tasklet, dma_tasklet,
+			     (unsigned long) d40c);
+
+		list_add_tail(&d40c->chan.device_node,
+			      &dma->channels);
+	}
+}
+
+static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
+{
+	if (dma_has_cap(DMA_SLAVE, dev->cap_mask))
+		dev->device_prep_slave_sg = d40_prep_slave_sg;
+
+	if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) {
+		dev->device_prep_dma_memcpy = d40_prep_memcpy;
+
+		/*
+		 * This controller can only access address at even
+		 * 32bit boundaries, i.e. 2^2
+		 */
+		dev->copy_align = 2;
+	}
+
+	if (dma_has_cap(DMA_SG, dev->cap_mask))
+		dev->device_prep_dma_sg = d40_prep_memcpy_sg;
+
+	if (dma_has_cap(DMA_CYCLIC, dev->cap_mask))
+		dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic;
+
+	dev->device_alloc_chan_resources = d40_alloc_chan_resources;
+	dev->device_free_chan_resources = d40_free_chan_resources;
+	dev->device_issue_pending = d40_issue_pending;
+	dev->device_tx_status = d40_tx_status;
+	dev->device_control = d40_control;
+	dev->dev = base->dev;
+}
+
+static int __init d40_dmaengine_init(struct d40_base *base,
+				     int num_reserved_chans)
+{
+	int err ;
+
+	d40_chan_init(base, &base->dma_slave, base->log_chans,
+		      0, base->num_log_chans);
+
+	dma_cap_zero(base->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+	d40_ops_init(base, &base->dma_slave);
+
+	err = dma_async_device_register(&base->dma_slave);
+
+	if (err) {
+		d40_err(base->dev, "Failed to register slave channels\n");
+		goto failure1;
+	}
+
+	d40_chan_init(base, &base->dma_memcpy, base->log_chans,
+		      base->num_log_chans, base->plat_data->memcpy_len);
+
+	dma_cap_zero(base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_memcpy.cap_mask);
+
+	d40_ops_init(base, &base->dma_memcpy);
+
+	err = dma_async_device_register(&base->dma_memcpy);
+
+	if (err) {
+		d40_err(base->dev,
+			"Failed to regsiter memcpy only channels\n");
+		goto failure2;
+	}
+
+	d40_chan_init(base, &base->dma_both, base->phy_chans,
+		      0, num_reserved_chans);
+
+	dma_cap_zero(base->dma_both.cap_mask);
+	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
+	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_both.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+	d40_ops_init(base, &base->dma_both);
+	err = dma_async_device_register(&base->dma_both);
+
+	if (err) {
+		d40_err(base->dev,
+			"Failed to register logical and physical capable channels\n");
+		goto failure3;
+	}
+	return 0;
+failure3:
+	dma_async_device_unregister(&base->dma_memcpy);
+failure2:
+	dma_async_device_unregister(&base->dma_slave);
+failure1:
+	return err;
+}
+
+/* Suspend resume functionality */
+#ifdef CONFIG_PM
+static int dma40_pm_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct d40_base *base = platform_get_drvdata(pdev);
+	int ret = 0;
+	if (!pm_runtime_suspended(dev))
+		return -EBUSY;
+
+	if (base->lcpa_regulator)
+		ret = regulator_disable(base->lcpa_regulator);
+	return ret;
+}
+
+static int dma40_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct d40_base *base = platform_get_drvdata(pdev);
+
+	d40_save_restore_registers(base, true);
+
+	/* Don't disable/enable clocks for v1 due to HW bugs */
+	if (base->rev != 1)
+		writel_relaxed(base->gcc_pwr_off_mask,
+			       base->virtbase + D40_DREG_GCC);
+
+	return 0;
+}
+
+static int dma40_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct d40_base *base = platform_get_drvdata(pdev);
+
+	if (base->initialized)
+		d40_save_restore_registers(base, false);
+
+	writel_relaxed(D40_DREG_GCC_ENABLE_ALL,
+		       base->virtbase + D40_DREG_GCC);
+	return 0;
+}
+
+static int dma40_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct d40_base *base = platform_get_drvdata(pdev);
+	int ret = 0;
+
+	if (base->lcpa_regulator)
+		ret = regulator_enable(base->lcpa_regulator);
+
+	return ret;
+}
+
+static const struct dev_pm_ops dma40_pm_ops = {
+	.suspend		= dma40_pm_suspend,
+	.runtime_suspend	= dma40_runtime_suspend,
+	.runtime_resume		= dma40_runtime_resume,
+	.resume			= dma40_resume,
+};
+#define DMA40_PM_OPS	(&dma40_pm_ops)
+#else
+#define DMA40_PM_OPS	NULL
+#endif
+
+/* Initialization functions. */
+
+static int __init d40_phy_res_init(struct d40_base *base)
+{
+	int i;
+	int num_phy_chans_avail = 0;
+	u32 val[2];
+	int odd_even_bit = -2;
+	int gcc = D40_DREG_GCC_ENA;
+
+	val[0] = readl(base->virtbase + D40_DREG_PRSME);
+	val[1] = readl(base->virtbase + D40_DREG_PRSMO);
+
+	for (i = 0; i < base->num_phy_chans; i++) {
+		base->phy_res[i].num = i;
+		odd_even_bit += 2 * ((i % 2) == 0);
+		if (((val[i % 2] >> odd_even_bit) & 3) == 1) {
+			/* Mark security only channels as occupied */
+			base->phy_res[i].allocated_src = D40_ALLOC_PHY;
+			base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
+			base->phy_res[i].reserved = true;
+			gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+						       D40_DREG_GCC_SRC);
+			gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+						       D40_DREG_GCC_DST);
+
+
+		} else {
+			base->phy_res[i].allocated_src = D40_ALLOC_FREE;
+			base->phy_res[i].allocated_dst = D40_ALLOC_FREE;
+			base->phy_res[i].reserved = false;
+			num_phy_chans_avail++;
+		}
+		spin_lock_init(&base->phy_res[i].lock);
+	}
+
+	/* Mark disabled channels as occupied */
+	for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
+		int chan = base->plat_data->disabled_channels[i];
+
+		base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
+		base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
+		base->phy_res[chan].reserved = true;
+		gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+					       D40_DREG_GCC_SRC);
+		gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+					       D40_DREG_GCC_DST);
+		num_phy_chans_avail--;
+	}
+
+	dev_info(base->dev, "%d of %d physical DMA channels available\n",
+		 num_phy_chans_avail, base->num_phy_chans);
+
+	/* Verify settings extended vs standard */
+	val[0] = readl(base->virtbase + D40_DREG_PRTYP);
+
+	for (i = 0; i < base->num_phy_chans; i++) {
+
+		if (base->phy_res[i].allocated_src == D40_ALLOC_FREE &&
+		    (val[0] & 0x3) != 1)
+			dev_info(base->dev,
+				 "[%s] INFO: channel %d is misconfigured (%d)\n",
+				 __func__, i, val[0] & 0x3);
+
+		val[0] = val[0] >> 2;
+	}
+
+	/*
+	 * To keep things simple, Enable all clocks initially.
+	 * The clocks will get managed later post channel allocation.
+	 * The clocks for the event lines on which reserved channels exists
+	 * are not managed here.
+	 */
+	writel(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+	base->gcc_pwr_off_mask = gcc;
+
+	return num_phy_chans_avail;
+}
+
+static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
+{
+	struct stedma40_platform_data *plat_data;
+	struct clk *clk = NULL;
+	void __iomem *virtbase = NULL;
+	struct resource *res = NULL;
+	struct d40_base *base = NULL;
+	int num_log_chans = 0;
+	int num_phy_chans;
+	int i;
+	u32 pid;
+	u32 cid;
+	u8 rev;
+
+	clk = clk_get(&pdev->dev, NULL);
+
+	if (IS_ERR(clk)) {
+		d40_err(&pdev->dev, "No matching clock found\n");
+		goto failure;
+	}
+
+	clk_enable(clk);
+
+	/* Get IO for DMAC base address */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
+	if (!res)
+		goto failure;
+
+	if (request_mem_region(res->start, resource_size(res),
+			       D40_NAME " I/O base") == NULL)
+		goto failure;
+
+	virtbase = ioremap(res->start, resource_size(res));
+	if (!virtbase)
+		goto failure;
+
+	/* This is just a regular AMBA PrimeCell ID actually */
+	for (pid = 0, i = 0; i < 4; i++)
+		pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i)
+			& 255) << (i * 8);
+	for (cid = 0, i = 0; i < 4; i++)
+		cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i)
+			& 255) << (i * 8);
+
+	if (cid != AMBA_CID) {
+		d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n");
+		goto failure;
+	}
+	if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) {
+		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
+			AMBA_MANF_BITS(pid),
+			AMBA_VENDOR_ST);
+		goto failure;
+	}
+	/*
+	 * HW revision:
+	 * DB8500ed has revision 0
+	 * ? has revision 1
+	 * DB8500v1 has revision 2
+	 * DB8500v2 has revision 3
+	 */
+	rev = AMBA_REV_BITS(pid);
+
+	/* The number of physical channels on this HW */
+	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
+
+	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n",
+		 rev, res->start);
+
+	if (rev < 2) {
+		d40_err(&pdev->dev, "hardware revision: %d is not supported",
+			rev);
+		goto failure;
+	}
+
+	plat_data = pdev->dev.platform_data;
+
+	/* Count the number of logical channels in use */
+	for (i = 0; i < plat_data->dev_len; i++)
+		if (plat_data->dev_rx[i] != 0)
+			num_log_chans++;
+
+	for (i = 0; i < plat_data->dev_len; i++)
+		if (plat_data->dev_tx[i] != 0)
+			num_log_chans++;
+
+	base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
+		       (num_phy_chans + num_log_chans + plat_data->memcpy_len) *
+		       sizeof(struct d40_chan), GFP_KERNEL);
+
+	if (base == NULL) {
+		d40_err(&pdev->dev, "Out of memory\n");
+		goto failure;
+	}
+
+	base->rev = rev;
+	base->clk = clk;
+	base->num_phy_chans = num_phy_chans;
+	base->num_log_chans = num_log_chans;
+	base->phy_start = res->start;
+	base->phy_size = resource_size(res);
+	base->virtbase = virtbase;
+	base->plat_data = plat_data;
+	base->dev = &pdev->dev;
+	base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4);
+	base->log_chans = &base->phy_chans[num_phy_chans];
+
+	base->phy_res = kzalloc(num_phy_chans * sizeof(struct d40_phy_res),
+				GFP_KERNEL);
+	if (!base->phy_res)
+		goto failure;
+
+	base->lookup_phy_chans = kzalloc(num_phy_chans *
+					 sizeof(struct d40_chan *),
+					 GFP_KERNEL);
+	if (!base->lookup_phy_chans)
+		goto failure;
+
+	if (num_log_chans + plat_data->memcpy_len) {
+		/*
+		 * The max number of logical channels are event lines for all
+		 * src devices and dst devices
+		 */
+		base->lookup_log_chans = kzalloc(plat_data->dev_len * 2 *
+						 sizeof(struct d40_chan *),
+						 GFP_KERNEL);
+		if (!base->lookup_log_chans)
+			goto failure;
+	}
+
+	base->reg_val_backup_chan = kmalloc(base->num_phy_chans *
+					    sizeof(d40_backup_regs_chan),
+					    GFP_KERNEL);
+	if (!base->reg_val_backup_chan)
+		goto failure;
+
+	base->lcla_pool.alloc_map =
+		kzalloc(num_phy_chans * sizeof(struct d40_desc *)
+			* D40_LCLA_LINK_PER_EVENT_GRP, GFP_KERNEL);
+	if (!base->lcla_pool.alloc_map)
+		goto failure;
+
+	base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
+					    0, SLAB_HWCACHE_ALIGN,
+					    NULL);
+	if (base->desc_slab == NULL)
+		goto failure;
+
+	return base;
+
+failure:
+	if (!IS_ERR(clk)) {
+		clk_disable(clk);
+		clk_put(clk);
+	}
+	if (virtbase)
+		iounmap(virtbase);
+	if (res)
+		release_mem_region(res->start,
+				   resource_size(res));
+	if (virtbase)
+		iounmap(virtbase);
+
+	if (base) {
+		kfree(base->lcla_pool.alloc_map);
+		kfree(base->reg_val_backup_chan);
+		kfree(base->lookup_log_chans);
+		kfree(base->lookup_phy_chans);
+		kfree(base->phy_res);
+		kfree(base);
+	}
+
+	return NULL;
+}
+
+static void __init d40_hw_init(struct d40_base *base)
+{
+
+	static struct d40_reg_val dma_init_reg[] = {
+		/* Clock every part of the DMA block from start */
+		{ .reg = D40_DREG_GCC,    .val = D40_DREG_GCC_ENABLE_ALL},
+
+		/* Interrupts on all logical channels */
+		{ .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF},
+		{ .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF}
+	};
+	int i;
+	u32 prmseo[2] = {0, 0};
+	u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF};
+	u32 pcmis = 0;
+	u32 pcicr = 0;
+
+	for (i = 0; i < ARRAY_SIZE(dma_init_reg); i++)
+		writel(dma_init_reg[i].val,
+		       base->virtbase + dma_init_reg[i].reg);
+
+	/* Configure all our dma channels to default settings */
+	for (i = 0; i < base->num_phy_chans; i++) {
+
+		activeo[i % 2] = activeo[i % 2] << 2;
+
+		if (base->phy_res[base->num_phy_chans - i - 1].allocated_src
+		    == D40_ALLOC_PHY) {
+			activeo[i % 2] |= 3;
+			continue;
+		}
+
+		/* Enable interrupt # */
+		pcmis = (pcmis << 1) | 1;
+
+		/* Clear interrupt # */
+		pcicr = (pcicr << 1) | 1;
+
+		/* Set channel to physical mode */
+		prmseo[i % 2] = prmseo[i % 2] << 2;
+		prmseo[i % 2] |= 1;
+
+	}
+
+	writel(prmseo[1], base->virtbase + D40_DREG_PRMSE);
+	writel(prmseo[0], base->virtbase + D40_DREG_PRMSO);
+	writel(activeo[1], base->virtbase + D40_DREG_ACTIVE);
+	writel(activeo[0], base->virtbase + D40_DREG_ACTIVO);
+
+	/* Write which interrupt to enable */
+	writel(pcmis, base->virtbase + D40_DREG_PCMIS);
+
+	/* Write which interrupt to clear */
+	writel(pcicr, base->virtbase + D40_DREG_PCICR);
+
+}
+
+static int __init d40_lcla_allocate(struct d40_base *base)
+{
+	struct d40_lcla_pool *pool = &base->lcla_pool;
+	unsigned long *page_list;
+	int i, j;
+	int ret = 0;
+
+	/*
+	 * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
+	 * To full fill this hardware requirement without wasting 256 kb
+	 * we allocate pages until we get an aligned one.
+	 */
+	page_list = kmalloc(sizeof(unsigned long) * MAX_LCLA_ALLOC_ATTEMPTS,
+			    GFP_KERNEL);
+
+	if (!page_list) {
+		ret = -ENOMEM;
+		goto failure;
+	}
+
+	/* Calculating how many pages that are required */
+	base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
+
+	for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
+		page_list[i] = __get_free_pages(GFP_KERNEL,
+						base->lcla_pool.pages);
+		if (!page_list[i]) {
+
+			d40_err(base->dev, "Failed to allocate %d pages.\n",
+				base->lcla_pool.pages);
+
+			for (j = 0; j < i; j++)
+				free_pages(page_list[j], base->lcla_pool.pages);
+			goto failure;
+		}
+
+		if ((virt_to_phys((void *)page_list[i]) &
+		     (LCLA_ALIGNMENT - 1)) == 0)
+			break;
+	}
+
+	for (j = 0; j < i; j++)
+		free_pages(page_list[j], base->lcla_pool.pages);
+
+	if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
+		base->lcla_pool.base = (void *)page_list[i];
+	} else {
+		/*
+		 * After many attempts and no succees with finding the correct
+		 * alignment, try with allocating a big buffer.
+		 */
+		dev_warn(base->dev,
+			 "[%s] Failed to get %d pages @ 18 bit align.\n",
+			 __func__, base->lcla_pool.pages);
+		base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
+							 base->num_phy_chans +
+							 LCLA_ALIGNMENT,
+							 GFP_KERNEL);
+		if (!base->lcla_pool.base_unaligned) {
+			ret = -ENOMEM;
+			goto failure;
+		}
+
+		base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
+						 LCLA_ALIGNMENT);
+	}
+
+	pool->dma_addr = dma_map_single(base->dev, pool->base,
+					SZ_1K * base->num_phy_chans,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(base->dev, pool->dma_addr)) {
+		pool->dma_addr = 0;
+		ret = -ENOMEM;
+		goto failure;
+	}
+
+	writel(virt_to_phys(base->lcla_pool.base),
+	       base->virtbase + D40_DREG_LCLA);
+failure:
+	kfree(page_list);
+	return ret;
+}
+
+static int __init d40_probe(struct platform_device *pdev)
+{
+	int err;
+	int ret = -ENOENT;
+	struct d40_base *base;
+	struct resource *res = NULL;
+	int num_reserved_chans;
+	u32 val;
+
+	base = d40_hw_detect_init(pdev);
+
+	if (!base)
+		goto failure;
+
+	num_reserved_chans = d40_phy_res_init(base);
+
+	platform_set_drvdata(pdev, base);
+
+	spin_lock_init(&base->interrupt_lock);
+	spin_lock_init(&base->execmd_lock);
+
+	/* Get IO for logical channel parameter address */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
+	if (!res) {
+		ret = -ENOENT;
+		d40_err(&pdev->dev, "No \"lcpa\" memory resource\n");
+		goto failure;
+	}
+	base->lcpa_size = resource_size(res);
+	base->phy_lcpa = res->start;
+
+	if (request_mem_region(res->start, resource_size(res),
+			       D40_NAME " I/O lcpa") == NULL) {
+		ret = -EBUSY;
+		d40_err(&pdev->dev,
+			"Failed to request LCPA region 0x%x-0x%x\n",
+			res->start, res->end);
+		goto failure;
+	}
+
+	/* We make use of ESRAM memory for this. */
+	val = readl(base->virtbase + D40_DREG_LCPA);
+	if (res->start != val && val != 0) {
+		dev_warn(&pdev->dev,
+			 "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n",
+			 __func__, val, res->start);
+	} else
+		writel(res->start, base->virtbase + D40_DREG_LCPA);
+
+	base->lcpa_base = ioremap(res->start, resource_size(res));
+	if (!base->lcpa_base) {
+		ret = -ENOMEM;
+		d40_err(&pdev->dev, "Failed to ioremap LCPA region\n");
+		goto failure;
+	}
+	/* If lcla has to be located in ESRAM we don't need to allocate */
+	if (base->plat_data->use_esram_lcla) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							"lcla_esram");
+		if (!res) {
+			ret = -ENOENT;
+			d40_err(&pdev->dev,
+				"No \"lcla_esram\" memory resource\n");
+			goto failure;
+		}
+		base->lcla_pool.base = ioremap(res->start,
+						resource_size(res));
+		if (!base->lcla_pool.base) {
+			ret = -ENOMEM;
+			d40_err(&pdev->dev, "Failed to ioremap LCLA region\n");
+			goto failure;
+		}
+		writel(res->start, base->virtbase + D40_DREG_LCLA);
+
+	} else {
+		ret = d40_lcla_allocate(base);
+		if (ret) {
+			d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
+			goto failure;
+		}
+	}
+
+	spin_lock_init(&base->lcla_pool.lock);
+
+	base->irq = platform_get_irq(pdev, 0);
+
+	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
+	if (ret) {
+		d40_err(&pdev->dev, "No IRQ defined\n");
+		goto failure;
+	}
+
+	pm_runtime_irq_safe(base->dev);
+	pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(base->dev);
+	pm_runtime_enable(base->dev);
+	pm_runtime_resume(base->dev);
+
+	if (base->plat_data->use_esram_lcla) {
+
+		base->lcpa_regulator = regulator_get(base->dev, "lcla_esram");
+		if (IS_ERR(base->lcpa_regulator)) {
+			d40_err(&pdev->dev, "Failed to get lcpa_regulator\n");
+			base->lcpa_regulator = NULL;
+			goto failure;
+		}
+
+		ret = regulator_enable(base->lcpa_regulator);
+		if (ret) {
+			d40_err(&pdev->dev,
+				"Failed to enable lcpa_regulator\n");
+			regulator_put(base->lcpa_regulator);
+			base->lcpa_regulator = NULL;
+			goto failure;
+		}
+	}
+
+	base->initialized = true;
+	err = d40_dmaengine_init(base, num_reserved_chans);
+	if (err)
+		goto failure;
+
+	d40_hw_init(base);
+
+	dev_info(base->dev, "initialized\n");
+	return 0;
+
+failure:
+	if (base) {
+		if (base->desc_slab)
+			kmem_cache_destroy(base->desc_slab);
+		if (base->virtbase)
+			iounmap(base->virtbase);
+
+		if (base->lcla_pool.base && base->plat_data->use_esram_lcla) {
+			iounmap(base->lcla_pool.base);
+			base->lcla_pool.base = NULL;
+		}
+
+		if (base->lcla_pool.dma_addr)
+			dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
+					 SZ_1K * base->num_phy_chans,
+					 DMA_TO_DEVICE);
+
+		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
+			free_pages((unsigned long)base->lcla_pool.base,
+				   base->lcla_pool.pages);
+
+		kfree(base->lcla_pool.base_unaligned);
+
+		if (base->phy_lcpa)
+			release_mem_region(base->phy_lcpa,
+					   base->lcpa_size);
+		if (base->phy_start)
+			release_mem_region(base->phy_start,
+					   base->phy_size);
+		if (base->clk) {
+			clk_disable(base->clk);
+			clk_put(base->clk);
+		}
+
+		if (base->lcpa_regulator) {
+			regulator_disable(base->lcpa_regulator);
+			regulator_put(base->lcpa_regulator);
+		}
+
+		kfree(base->lcla_pool.alloc_map);
+		kfree(base->lookup_log_chans);
+		kfree(base->lookup_phy_chans);
+		kfree(base->phy_res);
+		kfree(base);
+	}
+
+	d40_err(&pdev->dev, "probe failed\n");
+	return ret;
+}
+
+static struct platform_driver d40_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name  = D40_NAME,
+		.pm = DMA40_PM_OPS,
+	},
+};
+
+static int __init stedma40_init(void)
+{
+	return platform_driver_probe(&d40_driver, d40_probe);
+}
+subsys_initcall(stedma40_init);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
new file mode 100644
index 00000000..cad9e1da
--- /dev/null
+++ b/drivers/dma/ste_dma40_ll.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <plat/ste_dma40.h>
+
+#include "ste_dma40_ll.h"
+
+/* Sets up proper LCSP1 and LCSP3 register for a logical channel */
+void d40_log_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *lcsp1, u32 *lcsp3)
+{
+	u32 l3 = 0; /* dst */
+	u32 l1 = 0; /* src */
+
+	/* src is mem? -> increase address pos */
+	if (cfg->dir ==  STEDMA40_MEM_TO_PERIPH ||
+	    cfg->dir ==  STEDMA40_MEM_TO_MEM)
+		l1 |= 1 << D40_MEM_LCSP1_SCFG_INCR_POS;
+
+	/* dst is mem? -> increase address pos */
+	if (cfg->dir ==  STEDMA40_PERIPH_TO_MEM ||
+	    cfg->dir ==  STEDMA40_MEM_TO_MEM)
+		l3 |= 1 << D40_MEM_LCSP3_DCFG_INCR_POS;
+
+	/* src is hw? -> master port 1 */
+	if (cfg->dir ==  STEDMA40_PERIPH_TO_MEM ||
+	    cfg->dir ==  STEDMA40_PERIPH_TO_PERIPH)
+		l1 |= 1 << D40_MEM_LCSP1_SCFG_MST_POS;
+
+	/* dst is hw? -> master port 1 */
+	if (cfg->dir ==  STEDMA40_MEM_TO_PERIPH ||
+	    cfg->dir ==  STEDMA40_PERIPH_TO_PERIPH)
+		l3 |= 1 << D40_MEM_LCSP3_DCFG_MST_POS;
+
+	l3 |= 1 << D40_MEM_LCSP3_DCFG_EIM_POS;
+	l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS;
+	l3 |= cfg->dst_info.data_width << D40_MEM_LCSP3_DCFG_ESIZE_POS;
+
+	l1 |= 1 << D40_MEM_LCSP1_SCFG_EIM_POS;
+	l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
+	l1 |= cfg->src_info.data_width << D40_MEM_LCSP1_SCFG_ESIZE_POS;
+
+	*lcsp1 = l1;
+	*lcsp3 = l3;
+
+}
+
+/* Sets up SRC and DST CFG register for both logical and physical channels */
+void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *src_cfg, u32 *dst_cfg, bool is_log)
+{
+	u32 src = 0;
+	u32 dst = 0;
+
+	if (!is_log) {
+		/* Physical channel */
+		if ((cfg->dir ==  STEDMA40_PERIPH_TO_MEM) ||
+		    (cfg->dir == STEDMA40_PERIPH_TO_PERIPH)) {
+			/* Set master port to 1 */
+			src |= 1 << D40_SREG_CFG_MST_POS;
+			src |= D40_TYPE_TO_EVENT(cfg->src_dev_type);
+
+			if (cfg->src_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL)
+				src |= 1 << D40_SREG_CFG_PHY_TM_POS;
+			else
+				src |= 3 << D40_SREG_CFG_PHY_TM_POS;
+		}
+		if ((cfg->dir ==  STEDMA40_MEM_TO_PERIPH) ||
+		    (cfg->dir == STEDMA40_PERIPH_TO_PERIPH)) {
+			/* Set master port to 1 */
+			dst |= 1 << D40_SREG_CFG_MST_POS;
+			dst |= D40_TYPE_TO_EVENT(cfg->dst_dev_type);
+
+			if (cfg->dst_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL)
+				dst |= 1 << D40_SREG_CFG_PHY_TM_POS;
+			else
+				dst |= 3 << D40_SREG_CFG_PHY_TM_POS;
+		}
+		/* Interrupt on end of transfer for destination */
+		dst |= 1 << D40_SREG_CFG_TIM_POS;
+
+		/* Generate interrupt on error */
+		src |= 1 << D40_SREG_CFG_EIM_POS;
+		dst |= 1 << D40_SREG_CFG_EIM_POS;
+
+		/* PSIZE */
+		if (cfg->src_info.psize != STEDMA40_PSIZE_PHY_1) {
+			src |= 1 << D40_SREG_CFG_PHY_PEN_POS;
+			src |= cfg->src_info.psize << D40_SREG_CFG_PSIZE_POS;
+		}
+		if (cfg->dst_info.psize != STEDMA40_PSIZE_PHY_1) {
+			dst |= 1 << D40_SREG_CFG_PHY_PEN_POS;
+			dst |= cfg->dst_info.psize << D40_SREG_CFG_PSIZE_POS;
+		}
+
+		/* Element size */
+		src |= cfg->src_info.data_width << D40_SREG_CFG_ESIZE_POS;
+		dst |= cfg->dst_info.data_width << D40_SREG_CFG_ESIZE_POS;
+
+	} else {
+		/* Logical channel */
+		dst |= 1 << D40_SREG_CFG_LOG_GIM_POS;
+		src |= 1 << D40_SREG_CFG_LOG_GIM_POS;
+	}
+
+	if (cfg->high_priority) {
+		src |= 1 << D40_SREG_CFG_PRI_POS;
+		dst |= 1 << D40_SREG_CFG_PRI_POS;
+	}
+
+	if (cfg->src_info.big_endian)
+		src |= 1 << D40_SREG_CFG_LBE_POS;
+	if (cfg->dst_info.big_endian)
+		dst |= 1 << D40_SREG_CFG_LBE_POS;
+
+	*src_cfg = src;
+	*dst_cfg = dst;
+}
+
+static int d40_phy_fill_lli(struct d40_phy_lli *lli,
+			    dma_addr_t data,
+			    u32 data_size,
+			    dma_addr_t next_lli,
+			    u32 reg_cfg,
+			    struct stedma40_half_channel_info *info,
+			    unsigned int flags)
+{
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
+	unsigned int data_width = info->data_width;
+	int psize = info->psize;
+	int num_elems;
+
+	if (psize == STEDMA40_PSIZE_PHY_1)
+		num_elems = 1;
+	else
+		num_elems = 2 << psize;
+
+	/* Must be aligned */
+	if (!IS_ALIGNED(data, 0x1 << data_width))
+		return -EINVAL;
+
+	/* Transfer size can't be smaller than (num_elms * elem_size) */
+	if (data_size < num_elems * (0x1 << data_width))
+		return -EINVAL;
+
+	/* The number of elements. IE now many chunks */
+	lli->reg_elt = (data_size >> data_width) << D40_SREG_ELEM_PHY_ECNT_POS;
+
+	/*
+	 * Distance to next element sized entry.
+	 * Usually the size of the element unless you want gaps.
+	 */
+	if (addr_inc)
+		lli->reg_elt |= (0x1 << data_width) <<
+			D40_SREG_ELEM_PHY_EIDX_POS;
+
+	/* Where the data is */
+	lli->reg_ptr = data;
+	lli->reg_cfg = reg_cfg;
+
+	/* If this scatter list entry is the last one, no next link */
+	if (next_lli == 0)
+		lli->reg_lnk = 0x1 << D40_SREG_LNK_PHY_TCP_POS;
+	else
+		lli->reg_lnk = next_lli;
+
+	/* Set/clear interrupt generation on this link item.*/
+	if (term_int)
+		lli->reg_cfg |= 0x1 << D40_SREG_CFG_TIM_POS;
+	else
+		lli->reg_cfg &= ~(0x1 << D40_SREG_CFG_TIM_POS);
+
+	/* Post link */
+	lli->reg_lnk |= 0 << D40_SREG_LNK_PHY_PRE_POS;
+
+	return 0;
+}
+
+static int d40_seg_size(int size, int data_width1, int data_width2)
+{
+	u32 max_w = max(data_width1, data_width2);
+	u32 min_w = min(data_width1, data_width2);
+	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w);
+
+	if (seg_max > STEDMA40_MAX_SEG_SIZE)
+		seg_max -= (1 << max_w);
+
+	if (size <= seg_max)
+		return size;
+
+	if (size <= 2 * seg_max)
+		return ALIGN(size / 2, 1 << max_w);
+
+	return seg_max;
+}
+
+static struct d40_phy_lli *
+d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
+		   dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg,
+		   struct stedma40_half_channel_info *info,
+		   struct stedma40_half_channel_info *otherinfo,
+		   unsigned long flags)
+{
+	bool lastlink = flags & LLI_LAST_LINK;
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
+	bool cyclic = flags & LLI_CYCLIC;
+	int err;
+	dma_addr_t next = lli_phys;
+	int size_rest = size;
+	int size_seg = 0;
+
+	/*
+	 * This piece may be split up based on d40_seg_size(); we only want the
+	 * term int on the last part.
+	 */
+	if (term_int)
+		flags &= ~LLI_TERM_INT;
+
+	do {
+		size_seg = d40_seg_size(size_rest, info->data_width,
+					otherinfo->data_width);
+		size_rest -= size_seg;
+
+		if (size_rest == 0 && term_int)
+			flags |= LLI_TERM_INT;
+
+		if (size_rest == 0 && lastlink)
+			next = cyclic ? first_phys : 0;
+		else
+			next = ALIGN(next + sizeof(struct d40_phy_lli),
+				     D40_LLI_ALIGN);
+
+		err = d40_phy_fill_lli(lli, addr, size_seg, next,
+				       reg_cfg, info, flags);
+
+		if (err)
+			goto err;
+
+		lli++;
+		if (addr_inc)
+			addr += size_seg;
+	} while (size_rest);
+
+	return lli;
+
+ err:
+	return NULL;
+}
+
+int d40_phy_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t target,
+		      struct d40_phy_lli *lli_sg,
+		      dma_addr_t lli_phys,
+		      u32 reg_cfg,
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags)
+{
+	int total_size = 0;
+	int i;
+	struct scatterlist *current_sg = sg;
+	struct d40_phy_lli *lli = lli_sg;
+	dma_addr_t l_phys = lli_phys;
+
+	if (!target)
+		flags |= LLI_ADDR_INC;
+
+	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t dst = target ?: sg_addr;
+
+		total_size += sg_dma_len(current_sg);
+
+		if (i == sg_len - 1)
+			flags |= LLI_TERM_INT | LLI_LAST_LINK;
+
+		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
+			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
+
+		lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys,
+					 reg_cfg, info, otherinfo, flags);
+
+		if (lli == NULL)
+			return -EINVAL;
+	}
+
+	return total_size;
+}
+
+
+/* DMA logical lli operations */
+
+static void d40_log_lli_link(struct d40_log_lli *lli_dst,
+			     struct d40_log_lli *lli_src,
+			     int next, unsigned int flags)
+{
+	bool interrupt = flags & LLI_TERM_INT;
+	u32 slos = 0;
+	u32 dlos = 0;
+
+	if (next != -EINVAL) {
+		slos = next * 2;
+		dlos = next * 2 + 1;
+	}
+
+	if (interrupt) {
+		lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
+		lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
+	}
+
+	lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+		(slos << D40_MEM_LCSP1_SLOS_POS);
+
+	lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+		(dlos << D40_MEM_LCSP1_SLOS_POS);
+}
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+			   struct d40_log_lli *lli_dst,
+			   struct d40_log_lli *lli_src,
+			   int next, unsigned int flags)
+{
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
+
+	writel(lli_src->lcsp02, &lcpa[0].lcsp0);
+	writel(lli_src->lcsp13, &lcpa[0].lcsp1);
+	writel(lli_dst->lcsp02, &lcpa[0].lcsp2);
+	writel(lli_dst->lcsp13, &lcpa[0].lcsp3);
+}
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+			   struct d40_log_lli *lli_dst,
+			   struct d40_log_lli *lli_src,
+			   int next, unsigned int flags)
+{
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
+
+	writel(lli_src->lcsp02, &lcla[0].lcsp02);
+	writel(lli_src->lcsp13, &lcla[0].lcsp13);
+	writel(lli_dst->lcsp02, &lcla[1].lcsp02);
+	writel(lli_dst->lcsp13, &lcla[1].lcsp13);
+}
+
+static void d40_log_fill_lli(struct d40_log_lli *lli,
+			     dma_addr_t data, u32 data_size,
+			     u32 reg_cfg,
+			     u32 data_width,
+			     unsigned int flags)
+{
+	bool addr_inc = flags & LLI_ADDR_INC;
+
+	lli->lcsp13 = reg_cfg;
+
+	/* The number of elements to transfer */
+	lli->lcsp02 = ((data_size >> data_width) <<
+		       D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK;
+
+	BUG_ON((data_size >> data_width) > STEDMA40_MAX_SEG_SIZE);
+
+	/* 16 LSBs address of the current element */
+	lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK;
+	/* 16 MSBs address of the current element */
+	lli->lcsp13 |= data & D40_MEM_LCSP1_SPTR_MASK;
+
+	if (addr_inc)
+		lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK;
+
+}
+
+static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+				       dma_addr_t addr,
+				       int size,
+				       u32 lcsp13, /* src or dst*/
+				       u32 data_width1,
+				       u32 data_width2,
+				       unsigned int flags)
+{
+	bool addr_inc = flags & LLI_ADDR_INC;
+	struct d40_log_lli *lli = lli_sg;
+	int size_rest = size;
+	int size_seg = 0;
+
+	do {
+		size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+		size_rest -= size_seg;
+
+		d40_log_fill_lli(lli,
+				 addr,
+				 size_seg,
+				 lcsp13, data_width1,
+				 flags);
+		if (addr_inc)
+			addr += size_seg;
+		lli++;
+	} while (size_rest);
+
+	return lli;
+}
+
+int d40_log_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t dev_addr,
+		      struct d40_log_lli *lli_sg,
+		      u32 lcsp13, /* src or dst*/
+		      u32 data_width1, u32 data_width2)
+{
+	int total_size = 0;
+	struct scatterlist *current_sg = sg;
+	int i;
+	struct d40_log_lli *lli = lli_sg;
+	unsigned long flags = 0;
+
+	if (!dev_addr)
+		flags |= LLI_ADDR_INC;
+
+	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t addr = dev_addr ?: sg_addr;
+
+		total_size += sg_dma_len(current_sg);
+
+		lli = d40_log_buf_to_lli(lli, addr, len,
+					 lcsp13,
+					 data_width1,
+					 data_width2,
+					 flags);
+	}
+
+	return total_size;
+}
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
new file mode 100644
index 00000000..51e8e539
--- /dev/null
+++ b/drivers/dma/ste_dma40_ll.h
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson SA
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson SA
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#ifndef STE_DMA40_LL_H
+#define STE_DMA40_LL_H
+
+#define D40_DREG_PCBASE		0x400
+#define D40_DREG_PCDELTA	(8 * 4)
+#define D40_LLI_ALIGN		16 /* LLI alignment must be 16 bytes. */
+
+#define D40_LCPA_CHAN_SIZE 32
+#define D40_LCPA_CHAN_DST_DELTA 16
+
+#define D40_TYPE_TO_GROUP(type) (type / 16)
+#define D40_TYPE_TO_EVENT(type) (type % 16)
+#define D40_GROUP_SIZE 8
+#define D40_PHYS_TO_GROUP(phys) ((phys & (D40_GROUP_SIZE - 1)) / 2)
+
+/* Most bits of the CFG register are the same in log as in phy mode */
+#define D40_SREG_CFG_MST_POS		15
+#define D40_SREG_CFG_TIM_POS		14
+#define D40_SREG_CFG_EIM_POS		13
+#define D40_SREG_CFG_LOG_INCR_POS	12
+#define D40_SREG_CFG_PHY_PEN_POS	12
+#define D40_SREG_CFG_PSIZE_POS		10
+#define D40_SREG_CFG_ESIZE_POS		 8
+#define D40_SREG_CFG_PRI_POS		 7
+#define D40_SREG_CFG_LBE_POS		 6
+#define D40_SREG_CFG_LOG_GIM_POS	 5
+#define D40_SREG_CFG_LOG_MFU_POS	 4
+#define D40_SREG_CFG_PHY_TM_POS		 4
+#define D40_SREG_CFG_PHY_EVTL_POS	 0
+
+
+/* Standard channel parameters - basic mode (element register) */
+#define D40_SREG_ELEM_PHY_ECNT_POS	16
+#define D40_SREG_ELEM_PHY_EIDX_POS	 0
+
+#define D40_SREG_ELEM_PHY_ECNT_MASK	(0xFFFF << D40_SREG_ELEM_PHY_ECNT_POS)
+
+/* Standard channel parameters - basic mode (Link register) */
+#define D40_SREG_LNK_PHY_TCP_POS	0
+#define D40_SREG_LNK_PHY_LMP_POS	1
+#define D40_SREG_LNK_PHY_PRE_POS	2
+/*
+ * Source  destination link address. Contains the
+ * 29-bit byte word aligned address of the reload area.
+ */
+#define D40_SREG_LNK_PHYS_LNK_MASK	0xFFFFFFF8UL
+
+/* Standard basic channel logical mode */
+
+/* Element register */
+#define D40_SREG_ELEM_LOG_ECNT_POS	16
+#define D40_SREG_ELEM_LOG_LIDX_POS	 8
+#define D40_SREG_ELEM_LOG_LOS_POS	 1
+#define D40_SREG_ELEM_LOG_TCP_POS	 0
+
+#define D40_SREG_ELEM_LOG_LIDX_MASK	(0xFF << D40_SREG_ELEM_LOG_LIDX_POS)
+
+/* Link register */
+#define D40_EVENTLINE_POS(i)		(2 * i)
+#define D40_EVENTLINE_MASK(i)		(0x3 << D40_EVENTLINE_POS(i))
+
+/* Standard basic channel logical params in memory */
+
+/* LCSP0 */
+#define D40_MEM_LCSP0_ECNT_POS		16
+#define D40_MEM_LCSP0_SPTR_POS		 0
+
+#define D40_MEM_LCSP0_ECNT_MASK		(0xFFFF << D40_MEM_LCSP0_ECNT_POS)
+#define D40_MEM_LCSP0_SPTR_MASK		(0xFFFF << D40_MEM_LCSP0_SPTR_POS)
+
+/* LCSP1 */
+#define D40_MEM_LCSP1_SPTR_POS		16
+#define D40_MEM_LCSP1_SCFG_MST_POS	15
+#define D40_MEM_LCSP1_SCFG_TIM_POS	14
+#define D40_MEM_LCSP1_SCFG_EIM_POS	13
+#define D40_MEM_LCSP1_SCFG_INCR_POS	12
+#define D40_MEM_LCSP1_SCFG_PSIZE_POS	10
+#define D40_MEM_LCSP1_SCFG_ESIZE_POS	 8
+#define D40_MEM_LCSP1_SLOS_POS		 1
+#define D40_MEM_LCSP1_STCP_POS		 0
+
+#define D40_MEM_LCSP1_SPTR_MASK		(0xFFFF << D40_MEM_LCSP1_SPTR_POS)
+#define D40_MEM_LCSP1_SCFG_TIM_MASK	(0x1 << D40_MEM_LCSP1_SCFG_TIM_POS)
+#define D40_MEM_LCSP1_SCFG_INCR_MASK	(0x1 << D40_MEM_LCSP1_SCFG_INCR_POS)
+#define D40_MEM_LCSP1_SCFG_PSIZE_MASK	(0x3 << D40_MEM_LCSP1_SCFG_PSIZE_POS)
+#define D40_MEM_LCSP1_SLOS_MASK		(0x7F << D40_MEM_LCSP1_SLOS_POS)
+#define D40_MEM_LCSP1_STCP_MASK		(0x1 << D40_MEM_LCSP1_STCP_POS)
+
+/* LCSP2 */
+#define D40_MEM_LCSP2_ECNT_POS		16
+
+#define D40_MEM_LCSP2_ECNT_MASK		(0xFFFF << D40_MEM_LCSP2_ECNT_POS)
+
+/* LCSP3 */
+#define D40_MEM_LCSP3_DCFG_MST_POS	15
+#define D40_MEM_LCSP3_DCFG_TIM_POS	14
+#define D40_MEM_LCSP3_DCFG_EIM_POS	13
+#define D40_MEM_LCSP3_DCFG_INCR_POS	12
+#define D40_MEM_LCSP3_DCFG_PSIZE_POS	10
+#define D40_MEM_LCSP3_DCFG_ESIZE_POS	 8
+#define D40_MEM_LCSP3_DLOS_POS		 1
+#define D40_MEM_LCSP3_DTCP_POS		 0
+
+#define D40_MEM_LCSP3_DLOS_MASK		(0x7F << D40_MEM_LCSP3_DLOS_POS)
+#define D40_MEM_LCSP3_DTCP_MASK		(0x1 << D40_MEM_LCSP3_DTCP_POS)
+
+
+/* Standard channel parameter register offsets */
+#define D40_CHAN_REG_SSCFG	0x00
+#define D40_CHAN_REG_SSELT	0x04
+#define D40_CHAN_REG_SSPTR	0x08
+#define D40_CHAN_REG_SSLNK	0x0C
+#define D40_CHAN_REG_SDCFG	0x10
+#define D40_CHAN_REG_SDELT	0x14
+#define D40_CHAN_REG_SDPTR	0x18
+#define D40_CHAN_REG_SDLNK	0x1C
+
+/* DMA Register Offsets */
+#define D40_DREG_GCC		0x000
+#define D40_DREG_GCC_ENA	0x1
+/* This assumes that there are only 4 event groups */
+#define D40_DREG_GCC_ENABLE_ALL	0xff01
+#define D40_DREG_GCC_EVTGRP_POS 8
+#define D40_DREG_GCC_SRC 0
+#define D40_DREG_GCC_DST 1
+#define D40_DREG_GCC_EVTGRP_ENA(x, y) \
+	(1 << (D40_DREG_GCC_EVTGRP_POS + 2 * x + y))
+
+#define D40_DREG_PRTYP		0x004
+#define D40_DREG_PRSME		0x008
+#define D40_DREG_PRSMO		0x00C
+#define D40_DREG_PRMSE		0x010
+#define D40_DREG_PRMSO		0x014
+#define D40_DREG_PRMOE		0x018
+#define D40_DREG_PRMOO		0x01C
+#define D40_DREG_PRMO_PCHAN_BASIC		0x1
+#define D40_DREG_PRMO_PCHAN_MODULO		0x2
+#define D40_DREG_PRMO_PCHAN_DOUBLE_DST		0x3
+#define D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG	0x1
+#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY	0x2
+#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG	0x3
+
+#define D40_DREG_LCPA		0x020
+#define D40_DREG_LCLA		0x024
+#define D40_DREG_ACTIVE		0x050
+#define D40_DREG_ACTIVO		0x054
+#define D40_DREG_FSEB1		0x058
+#define D40_DREG_FSEB2		0x05C
+#define D40_DREG_PCMIS		0x060
+#define D40_DREG_PCICR		0x064
+#define D40_DREG_PCTIS		0x068
+#define D40_DREG_PCEIS		0x06C
+#define D40_DREG_LCMIS0		0x080
+#define D40_DREG_LCMIS1		0x084
+#define D40_DREG_LCMIS2		0x088
+#define D40_DREG_LCMIS3		0x08C
+#define D40_DREG_LCICR0		0x090
+#define D40_DREG_LCICR1		0x094
+#define D40_DREG_LCICR2		0x098
+#define D40_DREG_LCICR3		0x09C
+#define D40_DREG_LCTIS0		0x0A0
+#define D40_DREG_LCTIS1		0x0A4
+#define D40_DREG_LCTIS2		0x0A8
+#define D40_DREG_LCTIS3		0x0AC
+#define D40_DREG_LCEIS0		0x0B0
+#define D40_DREG_LCEIS1		0x0B4
+#define D40_DREG_LCEIS2		0x0B8
+#define D40_DREG_LCEIS3		0x0BC
+#define D40_DREG_PSEG1		0x110
+#define D40_DREG_PSEG2		0x114
+#define D40_DREG_PSEG3		0x118
+#define D40_DREG_PSEG4		0x11C
+#define D40_DREG_PCEG1		0x120
+#define D40_DREG_PCEG2		0x124
+#define D40_DREG_PCEG3		0x128
+#define D40_DREG_PCEG4		0x12C
+#define D40_DREG_RSEG1		0x130
+#define D40_DREG_RSEG2		0x134
+#define D40_DREG_RSEG3		0x138
+#define D40_DREG_RSEG4		0x13C
+#define D40_DREG_RCEG1		0x140
+#define D40_DREG_RCEG2		0x144
+#define D40_DREG_RCEG3		0x148
+#define D40_DREG_RCEG4		0x14C
+#define D40_DREG_STFU		0xFC8
+#define D40_DREG_ICFG		0xFCC
+#define D40_DREG_PERIPHID0	0xFE0
+#define D40_DREG_PERIPHID1	0xFE4
+#define D40_DREG_PERIPHID2	0xFE8
+#define D40_DREG_PERIPHID3	0xFEC
+#define D40_DREG_CELLID0	0xFF0
+#define D40_DREG_CELLID1	0xFF4
+#define D40_DREG_CELLID2	0xFF8
+#define D40_DREG_CELLID3	0xFFC
+
+/* LLI related structures */
+
+/**
+ * struct d40_phy_lli - The basic configration register for each physical
+ * channel.
+ *
+ * @reg_cfg: The configuration register.
+ * @reg_elt: The element register.
+ * @reg_ptr: The pointer register.
+ * @reg_lnk: The link register.
+ *
+ * These registers are set up for both physical and logical transfers
+ * Note that the bit in each register means differently in logical and
+ * physical(standard) mode.
+ *
+ * This struct must be 16 bytes aligned, and only contain physical registers
+ * since it will be directly accessed by the DMA.
+ */
+struct d40_phy_lli {
+	u32 reg_cfg;
+	u32 reg_elt;
+	u32 reg_ptr;
+	u32 reg_lnk;
+};
+
+/**
+ * struct d40_phy_lli_bidir - struct for a transfer.
+ *
+ * @src: Register settings for src channel.
+ * @dst: Register settings for dst channel.
+ *
+ * All DMA transfers have a source and a destination.
+ */
+
+struct d40_phy_lli_bidir {
+	struct d40_phy_lli	*src;
+	struct d40_phy_lli	*dst;
+};
+
+
+/**
+ * struct d40_log_lli - logical lli configuration
+ *
+ * @lcsp02: Either maps to register lcsp0 if src or lcsp2 if dst.
+ * @lcsp13: Either maps to register lcsp1 if src or lcsp3 if dst.
+ *
+ * This struct must be 8 bytes aligned since it will be accessed directy by
+ * the DMA. Never add any none hw mapped registers to this struct.
+ */
+
+struct d40_log_lli {
+	u32 lcsp02;
+	u32 lcsp13;
+};
+
+/**
+ * struct d40_log_lli_bidir - For both src and dst
+ *
+ * @src: pointer to src lli configuration.
+ * @dst: pointer to dst lli configuration.
+ *
+ * You always have a src and a dst when doing DMA transfers.
+ */
+
+struct d40_log_lli_bidir {
+	struct d40_log_lli *src;
+	struct d40_log_lli *dst;
+};
+
+/**
+ * struct d40_log_lli_full - LCPA layout
+ *
+ * @lcsp0: Logical Channel Standard Param 0 - Src.
+ * @lcsp1: Logical Channel Standard Param 1 - Src.
+ * @lcsp2: Logical Channel Standard Param 2 - Dst.
+ * @lcsp3: Logical Channel Standard Param 3 - Dst.
+ *
+ * This struct maps to LCPA physical memory layout. Must map to
+ * the hw.
+ */
+struct d40_log_lli_full {
+	u32 lcsp0;
+	u32 lcsp1;
+	u32 lcsp2;
+	u32 lcsp3;
+};
+
+/**
+ * struct d40_def_lcsp - Default LCSP1 and LCSP3 settings
+ *
+ * @lcsp3: The default configuration for dst.
+ * @lcsp1: The default configuration for src.
+ */
+struct d40_def_lcsp {
+	u32 lcsp3;
+	u32 lcsp1;
+};
+
+/* Physical channels */
+
+enum d40_lli_flags {
+	LLI_ADDR_INC	= 1 << 0,
+	LLI_TERM_INT	= 1 << 1,
+	LLI_CYCLIC	= 1 << 2,
+	LLI_LAST_LINK	= 1 << 3,
+};
+
+void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *src_cfg,
+		 u32 *dst_cfg,
+		 bool is_log);
+
+void d40_log_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *lcsp1,
+		 u32 *lcsp2);
+
+int d40_phy_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t target,
+		      struct d40_phy_lli *lli,
+		      dma_addr_t lli_phys,
+		      u32 reg_cfg,
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags);
+
+/* Logical channels */
+
+int d40_log_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t dev_addr,
+		      struct d40_log_lli *lli_sg,
+		      u32 lcsp13, /* src or dst*/
+		      u32 data_width1, u32 data_width2);
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+			    struct d40_log_lli *lli_dst,
+			    struct d40_log_lli *lli_src,
+			    int next, unsigned int flags);
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+			    struct d40_log_lli *lli_dst,
+			    struct d40_log_lli *lli_src,
+			    int next, unsigned int flags);
+
+#endif /* STE_DMA40_LLI_H */
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
new file mode 100644
index 00000000..4e0dff59
--- /dev/null
+++ b/drivers/dma/timb_dma.c
@@ -0,0 +1,834 @@
+/*
+ * timb_dma.c timberdale FPGA DMA driver
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA DMA engine
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/timb_dma.h>
+
+#include "dmaengine.h"
+
+#define DRIVER_NAME "timb-dma"
+
+/* Global DMA registers */
+#define TIMBDMA_ACR		0x34
+#define TIMBDMA_32BIT_ADDR	0x01
+
+#define TIMBDMA_ISR		0x080000
+#define TIMBDMA_IPR		0x080004
+#define TIMBDMA_IER		0x080008
+
+/* Channel specific registers */
+/* RX instances base addresses are 0x00, 0x40, 0x80 ...
+ * TX instances base addresses are 0x18, 0x58, 0x98 ...
+ */
+#define TIMBDMA_INSTANCE_OFFSET		0x40
+#define TIMBDMA_INSTANCE_TX_OFFSET	0x18
+
+/* RX registers, relative the instance base */
+#define TIMBDMA_OFFS_RX_DHAR	0x00
+#define TIMBDMA_OFFS_RX_DLAR	0x04
+#define TIMBDMA_OFFS_RX_LR	0x0C
+#define TIMBDMA_OFFS_RX_BLR	0x10
+#define TIMBDMA_OFFS_RX_ER	0x14
+#define TIMBDMA_RX_EN		0x01
+/* bytes per Row, video specific register
+ * which is placed after the TX registers...
+ */
+#define TIMBDMA_OFFS_RX_BPRR	0x30
+
+/* TX registers, relative the instance base */
+#define TIMBDMA_OFFS_TX_DHAR	0x00
+#define TIMBDMA_OFFS_TX_DLAR	0x04
+#define TIMBDMA_OFFS_TX_BLR	0x0C
+#define TIMBDMA_OFFS_TX_LR	0x14
+
+
+#define TIMB_DMA_DESC_SIZE	8
+
+struct timb_dma_desc {
+	struct list_head		desc_node;
+	struct dma_async_tx_descriptor	txd;
+	u8				*desc_list;
+	unsigned int			desc_list_len;
+	bool				interrupt;
+};
+
+struct timb_dma_chan {
+	struct dma_chan		chan;
+	void __iomem		*membase;
+	spinlock_t		lock; /* Used to protect data structures,
+					especially the lists and descriptors,
+					from races between the tasklet and calls
+					from above */
+	bool			ongoing;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		bytes_per_line;
+	enum dma_transfer_direction	direction;
+	unsigned int		descs; /* Descriptors to allocate */
+	unsigned int		desc_elems; /* number of elems per descriptor */
+};
+
+struct timb_dma {
+	struct dma_device	dma;
+	void __iomem		*membase;
+	struct tasklet_struct	tasklet;
+	struct timb_dma_chan	channels[0];
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+static struct device *chan2dmadev(struct dma_chan *chan)
+{
+	return chan2dev(chan)->parent->parent;
+}
+
+static struct timb_dma *tdchantotd(struct timb_dma_chan *td_chan)
+{
+	int id = td_chan->chan.chan_id;
+	return (struct timb_dma *)((u8 *)td_chan -
+		id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma));
+}
+
+/* Must be called with the spinlock held */
+static void __td_enable_chan_irq(struct timb_dma_chan *td_chan)
+{
+	int id = td_chan->chan.chan_id;
+	struct timb_dma *td = tdchantotd(td_chan);
+	u32 ier;
+
+	/* enable interrupt for this channel */
+	ier = ioread32(td->membase + TIMBDMA_IER);
+	ier |= 1 << id;
+	dev_dbg(chan2dev(&td_chan->chan), "Enabling irq: %d, IER: 0x%x\n", id,
+		ier);
+	iowrite32(ier, td->membase + TIMBDMA_IER);
+}
+
+/* Should be called with the spinlock held */
+static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
+{
+	int id = td_chan->chan.chan_id;
+	struct timb_dma *td = (struct timb_dma *)((u8 *)td_chan -
+		id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma));
+	u32 isr;
+	bool done = false;
+
+	dev_dbg(chan2dev(&td_chan->chan), "Checking irq: %d, td: %p\n", id, td);
+
+	isr = ioread32(td->membase + TIMBDMA_ISR) & (1 << id);
+	if (isr) {
+		iowrite32(isr, td->membase + TIMBDMA_ISR);
+		done = true;
+	}
+
+	return done;
+}
+
+static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
+	bool single)
+{
+	dma_addr_t addr;
+	int len;
+
+	addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) |
+		dma_desc[4];
+
+	len = (dma_desc[3] << 8) | dma_desc[2];
+
+	if (single)
+		dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
+			DMA_TO_DEVICE);
+	else
+		dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
+			DMA_TO_DEVICE);
+}
+
+static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
+{
+	struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan,
+		struct timb_dma_chan, chan);
+	u8 *descs;
+
+	for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) {
+		__td_unmap_desc(td_chan, descs, single);
+		if (descs[0] & 0x02)
+			break;
+	}
+}
+
+static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
+	struct scatterlist *sg, bool last)
+{
+	if (sg_dma_len(sg) > USHRT_MAX) {
+		dev_err(chan2dev(&td_chan->chan), "Too big sg element\n");
+		return -EINVAL;
+	}
+
+	/* length must be word aligned */
+	if (sg_dma_len(sg) % sizeof(u32)) {
+		dev_err(chan2dev(&td_chan->chan), "Incorrect length: %d\n",
+			sg_dma_len(sg));
+		return -EINVAL;
+	}
+
+	dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: 0x%llx\n",
+		dma_desc, (unsigned long long)sg_dma_address(sg));
+
+	dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff;
+	dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff;
+	dma_desc[5] = (sg_dma_address(sg) >> 8) & 0xff;
+	dma_desc[4] = (sg_dma_address(sg) >> 0) & 0xff;
+
+	dma_desc[3] = (sg_dma_len(sg) >> 8) & 0xff;
+	dma_desc[2] = (sg_dma_len(sg) >> 0) & 0xff;
+
+	dma_desc[1] = 0x00;
+	dma_desc[0] = 0x21 | (last ? 0x02 : 0); /* tran, valid */
+
+	return 0;
+}
+
+/* Must be called with the spinlock held */
+static void __td_start_dma(struct timb_dma_chan *td_chan)
+{
+	struct timb_dma_desc *td_desc;
+
+	if (td_chan->ongoing) {
+		dev_err(chan2dev(&td_chan->chan),
+			"Transfer already ongoing\n");
+		return;
+	}
+
+	td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc,
+		desc_node);
+
+	dev_dbg(chan2dev(&td_chan->chan),
+		"td_chan: %p, chan: %d, membase: %p\n",
+		td_chan, td_chan->chan.chan_id, td_chan->membase);
+
+	if (td_chan->direction == DMA_DEV_TO_MEM) {
+
+		/* descriptor address */
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
+		iowrite32(td_desc->txd.phys, td_chan->membase +
+			TIMBDMA_OFFS_RX_DLAR);
+		/* Bytes per line */
+		iowrite32(td_chan->bytes_per_line, td_chan->membase +
+			TIMBDMA_OFFS_RX_BPRR);
+		/* enable RX */
+		iowrite32(TIMBDMA_RX_EN, td_chan->membase + TIMBDMA_OFFS_RX_ER);
+	} else {
+		/* address high */
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DHAR);
+		iowrite32(td_desc->txd.phys, td_chan->membase +
+			TIMBDMA_OFFS_TX_DLAR);
+	}
+
+	td_chan->ongoing = true;
+
+	if (td_desc->interrupt)
+		__td_enable_chan_irq(td_chan);
+}
+
+static void __td_finish(struct timb_dma_chan *td_chan)
+{
+	dma_async_tx_callback		callback;
+	void				*param;
+	struct dma_async_tx_descriptor	*txd;
+	struct timb_dma_desc		*td_desc;
+
+	/* can happen if the descriptor is canceled */
+	if (list_empty(&td_chan->active_list))
+		return;
+
+	td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc,
+		desc_node);
+	txd = &td_desc->txd;
+
+	dev_dbg(chan2dev(&td_chan->chan), "descriptor %u complete\n",
+		txd->cookie);
+
+	/* make sure to stop the transfer */
+	if (td_chan->direction == DMA_DEV_TO_MEM)
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
+/* Currently no support for stopping DMA transfers
+	else
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR);
+*/
+	dma_cookie_complete(txd);
+	td_chan->ongoing = false;
+
+	callback = txd->callback;
+	param = txd->callback_param;
+
+	list_move(&td_desc->desc_node, &td_chan->free_list);
+
+	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
+		__td_unmap_descs(td_desc,
+			txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
+
+	/*
+	 * The API requires that no submissions are done from a
+	 * callback, so we don't need to drop the lock here
+	 */
+	if (callback)
+		callback(param);
+}
+
+static u32 __td_ier_mask(struct timb_dma *td)
+{
+	int i;
+	u32 ret = 0;
+
+	for (i = 0; i < td->dma.chancnt; i++) {
+		struct timb_dma_chan *td_chan = td->channels + i;
+		if (td_chan->ongoing) {
+			struct timb_dma_desc *td_desc =
+				list_entry(td_chan->active_list.next,
+				struct timb_dma_desc, desc_node);
+			if (td_desc->interrupt)
+				ret |= 1 << i;
+		}
+	}
+
+	return ret;
+}
+
+static void __td_start_next(struct timb_dma_chan *td_chan)
+{
+	struct timb_dma_desc *td_desc;
+
+	BUG_ON(list_empty(&td_chan->queue));
+	BUG_ON(td_chan->ongoing);
+
+	td_desc = list_entry(td_chan->queue.next, struct timb_dma_desc,
+		desc_node);
+
+	dev_dbg(chan2dev(&td_chan->chan), "%s: started %u\n",
+		__func__, td_desc->txd.cookie);
+
+	list_move(&td_desc->desc_node, &td_chan->active_list);
+	__td_start_dma(td_chan);
+}
+
+static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct timb_dma_desc *td_desc = container_of(txd, struct timb_dma_desc,
+		txd);
+	struct timb_dma_chan *td_chan = container_of(txd->chan,
+		struct timb_dma_chan, chan);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&td_chan->lock);
+	cookie = dma_cookie_assign(txd);
+
+	if (list_empty(&td_chan->active_list)) {
+		dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__,
+			txd->cookie);
+		list_add_tail(&td_desc->desc_node, &td_chan->active_list);
+		__td_start_dma(td_chan);
+	} else {
+		dev_dbg(chan2dev(txd->chan), "tx_submit: queued %u\n",
+			txd->cookie);
+
+		list_add_tail(&td_desc->desc_node, &td_chan->queue);
+	}
+
+	spin_unlock_bh(&td_chan->lock);
+
+	return cookie;
+}
+
+static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
+{
+	struct dma_chan *chan = &td_chan->chan;
+	struct timb_dma_desc *td_desc;
+	int err;
+
+	td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL);
+	if (!td_desc) {
+		dev_err(chan2dev(chan), "Failed to alloc descriptor\n");
+		goto out;
+	}
+
+	td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE;
+
+	td_desc->desc_list = kzalloc(td_desc->desc_list_len, GFP_KERNEL);
+	if (!td_desc->desc_list) {
+		dev_err(chan2dev(chan), "Failed to alloc descriptor\n");
+		goto err;
+	}
+
+	dma_async_tx_descriptor_init(&td_desc->txd, chan);
+	td_desc->txd.tx_submit = td_tx_submit;
+	td_desc->txd.flags = DMA_CTRL_ACK;
+
+	td_desc->txd.phys = dma_map_single(chan2dmadev(chan),
+		td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE);
+
+	err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys);
+	if (err) {
+		dev_err(chan2dev(chan), "DMA mapping error: %d\n", err);
+		goto err;
+	}
+
+	return td_desc;
+err:
+	kfree(td_desc->desc_list);
+	kfree(td_desc);
+out:
+	return NULL;
+
+}
+
+static void td_free_desc(struct timb_dma_desc *td_desc)
+{
+	dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc);
+	dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys,
+		td_desc->desc_list_len, DMA_TO_DEVICE);
+
+	kfree(td_desc->desc_list);
+	kfree(td_desc);
+}
+
+static void td_desc_put(struct timb_dma_chan *td_chan,
+	struct timb_dma_desc *td_desc)
+{
+	dev_dbg(chan2dev(&td_chan->chan), "Putting desc: %p\n", td_desc);
+
+	spin_lock_bh(&td_chan->lock);
+	list_add(&td_desc->desc_node, &td_chan->free_list);
+	spin_unlock_bh(&td_chan->lock);
+}
+
+static struct timb_dma_desc *td_desc_get(struct timb_dma_chan *td_chan)
+{
+	struct timb_dma_desc *td_desc, *_td_desc;
+	struct timb_dma_desc *ret = NULL;
+
+	spin_lock_bh(&td_chan->lock);
+	list_for_each_entry_safe(td_desc, _td_desc, &td_chan->free_list,
+		desc_node) {
+		if (async_tx_test_ack(&td_desc->txd)) {
+			list_del(&td_desc->desc_node);
+			ret = td_desc;
+			break;
+		}
+		dev_dbg(chan2dev(&td_chan->chan), "desc %p not ACKed\n",
+			td_desc);
+	}
+	spin_unlock_bh(&td_chan->lock);
+
+	return ret;
+}
+
+static int td_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	int i;
+
+	dev_dbg(chan2dev(chan), "%s: entry\n", __func__);
+
+	BUG_ON(!list_empty(&td_chan->free_list));
+	for (i = 0; i < td_chan->descs; i++) {
+		struct timb_dma_desc *td_desc = td_alloc_init_desc(td_chan);
+		if (!td_desc) {
+			if (i)
+				break;
+			else {
+				dev_err(chan2dev(chan),
+					"Couldnt allocate any descriptors\n");
+				return -ENOMEM;
+			}
+		}
+
+		td_desc_put(td_chan, td_desc);
+	}
+
+	spin_lock_bh(&td_chan->lock);
+	dma_cookie_init(chan);
+	spin_unlock_bh(&td_chan->lock);
+
+	return 0;
+}
+
+static void td_free_chan_resources(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	struct timb_dma_desc *td_desc, *_td_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+	/* check that all descriptors are free */
+	BUG_ON(!list_empty(&td_chan->active_list));
+	BUG_ON(!list_empty(&td_chan->queue));
+
+	spin_lock_bh(&td_chan->lock);
+	list_splice_init(&td_chan->free_list, &list);
+	spin_unlock_bh(&td_chan->lock);
+
+	list_for_each_entry_safe(td_desc, _td_desc, &list, desc_node) {
+		dev_dbg(chan2dev(chan), "%s: Freeing desc: %p\n", __func__,
+			td_desc);
+		td_free_desc(td_desc);
+	}
+}
+
+static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+				    struct dma_tx_state *txstate)
+{
+	enum dma_status ret;
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", 	__func__, ret);
+
+	return ret;
+}
+
+static void td_issue_pending(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+	spin_lock_bh(&td_chan->lock);
+
+	if (!list_empty(&td_chan->active_list))
+		/* transfer ongoing */
+		if (__td_dma_done_ack(td_chan))
+			__td_finish(td_chan);
+
+	if (list_empty(&td_chan->active_list) && !list_empty(&td_chan->queue))
+		__td_start_next(td_chan);
+
+	spin_unlock_bh(&td_chan->lock);
+}
+
+static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
+	struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	struct timb_dma_desc *td_desc;
+	struct scatterlist *sg;
+	unsigned int i;
+	unsigned int desc_usage = 0;
+
+	if (!sgl || !sg_len) {
+		dev_err(chan2dev(chan), "%s: No SG list\n", __func__);
+		return NULL;
+	}
+
+	/* even channels are for RX, odd for TX */
+	if (td_chan->direction != direction) {
+		dev_err(chan2dev(chan),
+			"Requesting channel in wrong direction\n");
+		return NULL;
+	}
+
+	td_desc = td_desc_get(td_chan);
+	if (!td_desc) {
+		dev_err(chan2dev(chan), "Not enough descriptors available\n");
+		return NULL;
+	}
+
+	td_desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		int err;
+		if (desc_usage > td_desc->desc_list_len) {
+			dev_err(chan2dev(chan), "No descriptor space\n");
+			return NULL;
+		}
+
+		err = td_fill_desc(td_chan, td_desc->desc_list + desc_usage, sg,
+			i == (sg_len - 1));
+		if (err) {
+			dev_err(chan2dev(chan), "Failed to update desc: %d\n",
+				err);
+			td_desc_put(td_chan, td_desc);
+			return NULL;
+		}
+		desc_usage += TIMB_DMA_DESC_SIZE;
+	}
+
+	dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
+
+	return &td_desc->txd;
+}
+
+static int td_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		      unsigned long arg)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	struct timb_dma_desc *td_desc, *_td_desc;
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	/* first the easy part, put the queue into the free list */
+	spin_lock_bh(&td_chan->lock);
+	list_for_each_entry_safe(td_desc, _td_desc, &td_chan->queue,
+		desc_node)
+		list_move(&td_desc->desc_node, &td_chan->free_list);
+
+	/* now tear down the running */
+	__td_finish(td_chan);
+	spin_unlock_bh(&td_chan->lock);
+
+	return 0;
+}
+
+static void td_tasklet(unsigned long data)
+{
+	struct timb_dma *td = (struct timb_dma *)data;
+	u32 isr;
+	u32 ipr;
+	u32 ier;
+	int i;
+
+	isr = ioread32(td->membase + TIMBDMA_ISR);
+	ipr = isr & __td_ier_mask(td);
+
+	/* ack the interrupts */
+	iowrite32(ipr, td->membase + TIMBDMA_ISR);
+
+	for (i = 0; i < td->dma.chancnt; i++)
+		if (ipr & (1 << i)) {
+			struct timb_dma_chan *td_chan = td->channels + i;
+			spin_lock(&td_chan->lock);
+			__td_finish(td_chan);
+			if (!list_empty(&td_chan->queue))
+				__td_start_next(td_chan);
+			spin_unlock(&td_chan->lock);
+		}
+
+	ier = __td_ier_mask(td);
+	iowrite32(ier, td->membase + TIMBDMA_IER);
+}
+
+
+static irqreturn_t td_irq(int irq, void *devid)
+{
+	struct timb_dma *td = devid;
+	u32 ipr = ioread32(td->membase + TIMBDMA_IPR);
+
+	if (ipr) {
+		/* disable interrupts, will be re-enabled in tasklet */
+		iowrite32(0, td->membase + TIMBDMA_IER);
+
+		tasklet_schedule(&td->tasklet);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+
+static int __devinit td_probe(struct platform_device *pdev)
+{
+	struct timb_dma_platform_data *pdata = pdev->dev.platform_data;
+	struct timb_dma *td;
+	struct resource *iomem;
+	int irq;
+	int err;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	if (!request_mem_region(iomem->start, resource_size(iomem),
+		DRIVER_NAME))
+		return -EBUSY;
+
+	td  = kzalloc(sizeof(struct timb_dma) +
+		sizeof(struct timb_dma_chan) * pdata->nr_channels, GFP_KERNEL);
+	if (!td) {
+		err = -ENOMEM;
+		goto err_release_region;
+	}
+
+	dev_dbg(&pdev->dev, "Allocated TD: %p\n", td);
+
+	td->membase = ioremap(iomem->start, resource_size(iomem));
+	if (!td->membase) {
+		dev_err(&pdev->dev, "Failed to remap I/O memory\n");
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	/* 32bit addressing */
+	iowrite32(TIMBDMA_32BIT_ADDR, td->membase + TIMBDMA_ACR);
+
+	/* disable and clear any interrupts */
+	iowrite32(0x0, td->membase + TIMBDMA_IER);
+	iowrite32(0xFFFFFFFF, td->membase + TIMBDMA_ISR);
+
+	tasklet_init(&td->tasklet, td_tasklet, (unsigned long)td);
+
+	err = request_irq(irq, td_irq, IRQF_SHARED, DRIVER_NAME, td);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request IRQ\n");
+		goto err_tasklet_kill;
+	}
+
+	td->dma.device_alloc_chan_resources	= td_alloc_chan_resources;
+	td->dma.device_free_chan_resources	= td_free_chan_resources;
+	td->dma.device_tx_status		= td_tx_status;
+	td->dma.device_issue_pending		= td_issue_pending;
+
+	dma_cap_set(DMA_SLAVE, td->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, td->dma.cap_mask);
+	td->dma.device_prep_slave_sg = td_prep_slave_sg;
+	td->dma.device_control = td_control;
+
+	td->dma.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&td->dma.channels);
+
+	for (i = 0; i < pdata->nr_channels; i++) {
+		struct timb_dma_chan *td_chan = &td->channels[i];
+		struct timb_dma_platform_data_channel *pchan =
+			pdata->channels + i;
+
+		/* even channels are RX, odd are TX */
+		if ((i % 2) == pchan->rx) {
+			dev_err(&pdev->dev, "Wrong channel configuration\n");
+			err = -EINVAL;
+			goto err_free_irq;
+		}
+
+		td_chan->chan.device = &td->dma;
+		dma_cookie_init(&td_chan->chan);
+		spin_lock_init(&td_chan->lock);
+		INIT_LIST_HEAD(&td_chan->active_list);
+		INIT_LIST_HEAD(&td_chan->queue);
+		INIT_LIST_HEAD(&td_chan->free_list);
+
+		td_chan->descs = pchan->descriptors;
+		td_chan->desc_elems = pchan->descriptor_elements;
+		td_chan->bytes_per_line = pchan->bytes_per_line;
+		td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+			DMA_MEM_TO_DEV;
+
+		td_chan->membase = td->membase +
+			(i / 2) * TIMBDMA_INSTANCE_OFFSET +
+			(pchan->rx ? 0 : TIMBDMA_INSTANCE_TX_OFFSET);
+
+		dev_dbg(&pdev->dev, "Chan: %d, membase: %p\n",
+			i, td_chan->membase);
+
+		list_add_tail(&td_chan->chan.device_node, &td->dma.channels);
+	}
+
+	err = dma_async_device_register(&td->dma);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register async device\n");
+		goto err_free_irq;
+	}
+
+	platform_set_drvdata(pdev, td);
+
+	dev_dbg(&pdev->dev, "Probe result: %d\n", err);
+	return err;
+
+err_free_irq:
+	free_irq(irq, td);
+err_tasklet_kill:
+	tasklet_kill(&td->tasklet);
+	iounmap(td->membase);
+err_free_mem:
+	kfree(td);
+err_release_region:
+	release_mem_region(iomem->start, resource_size(iomem));
+
+	return err;
+
+}
+
+static int __devexit td_remove(struct platform_device *pdev)
+{
+	struct timb_dma *td = platform_get_drvdata(pdev);
+	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int irq = platform_get_irq(pdev, 0);
+
+	dma_async_device_unregister(&td->dma);
+	free_irq(irq, td);
+	tasklet_kill(&td->tasklet);
+	iounmap(td->membase);
+	kfree(td);
+	release_mem_region(iomem->start, resource_size(iomem));
+
+	platform_set_drvdata(pdev, NULL);
+
+	dev_dbg(&pdev->dev, "Removed...\n");
+	return 0;
+}
+
+static struct platform_driver td_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner  = THIS_MODULE,
+	},
+	.probe	= td_probe,
+	.remove	= __exit_p(td_remove),
+};
+
+module_platform_driver(td_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Timberdale DMA controller driver");
+MODULE_AUTHOR("Pelagicore AB <info@pelagicore.com>");
+MODULE_ALIAS("platform:"DRIVER_NAME);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
new file mode 100644
index 00000000..913f55c7
--- /dev/null
+++ b/drivers/dma/txx9dmac.c
@@ -0,0 +1,1343 @@
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+
+#include "dmaengine.h"
+#include "txx9dmac.h"
+
+static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct txx9dmac_chan, chan);
+}
+
+static struct txx9dmac_cregs __iomem *__dma_regs(const struct txx9dmac_chan *dc)
+{
+	return dc->ch_regs;
+}
+
+static struct txx9dmac_cregs32 __iomem *__dma_regs32(
+	const struct txx9dmac_chan *dc)
+{
+	return dc->ch_regs;
+}
+
+#define channel64_readq(dc, name) \
+	__raw_readq(&(__dma_regs(dc)->name))
+#define channel64_writeq(dc, name, val) \
+	__raw_writeq((val), &(__dma_regs(dc)->name))
+#define channel64_readl(dc, name) \
+	__raw_readl(&(__dma_regs(dc)->name))
+#define channel64_writel(dc, name, val) \
+	__raw_writel((val), &(__dma_regs(dc)->name))
+
+#define channel32_readl(dc, name) \
+	__raw_readl(&(__dma_regs32(dc)->name))
+#define channel32_writel(dc, name, val) \
+	__raw_writel((val), &(__dma_regs32(dc)->name))
+
+#define channel_readq(dc, name) channel64_readq(dc, name)
+#define channel_writeq(dc, name, val) channel64_writeq(dc, name, val)
+#define channel_readl(dc, name) \
+	(is_dmac64(dc) ? \
+	 channel64_readl(dc, name) : channel32_readl(dc, name))
+#define channel_writel(dc, name, val) \
+	(is_dmac64(dc) ? \
+	 channel64_writel(dc, name, val) : channel32_writel(dc, name, val))
+
+static dma_addr_t channel64_read_CHAR(const struct txx9dmac_chan *dc)
+{
+	if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+		return channel64_readq(dc, CHAR);
+	else
+		return channel64_readl(dc, CHAR);
+}
+
+static void channel64_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+	if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+		channel64_writeq(dc, CHAR, val);
+	else
+		channel64_writel(dc, CHAR, val);
+}
+
+static void channel64_clear_CHAR(const struct txx9dmac_chan *dc)
+{
+#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR)
+	channel64_writel(dc, CHAR, 0);
+	channel64_writel(dc, __pad_CHAR, 0);
+#else
+	channel64_writeq(dc, CHAR, 0);
+#endif
+}
+
+static dma_addr_t channel_read_CHAR(const struct txx9dmac_chan *dc)
+{
+	if (is_dmac64(dc))
+		return channel64_read_CHAR(dc);
+	else
+		return channel32_readl(dc, CHAR);
+}
+
+static void channel_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+	if (is_dmac64(dc))
+		channel64_write_CHAR(dc, val);
+	else
+		channel32_writel(dc, CHAR, val);
+}
+
+static struct txx9dmac_regs __iomem *__txx9dmac_regs(
+	const struct txx9dmac_dev *ddev)
+{
+	return ddev->regs;
+}
+
+static struct txx9dmac_regs32 __iomem *__txx9dmac_regs32(
+	const struct txx9dmac_dev *ddev)
+{
+	return ddev->regs;
+}
+
+#define dma64_readl(ddev, name) \
+	__raw_readl(&(__txx9dmac_regs(ddev)->name))
+#define dma64_writel(ddev, name, val) \
+	__raw_writel((val), &(__txx9dmac_regs(ddev)->name))
+
+#define dma32_readl(ddev, name) \
+	__raw_readl(&(__txx9dmac_regs32(ddev)->name))
+#define dma32_writel(ddev, name, val) \
+	__raw_writel((val), &(__txx9dmac_regs32(ddev)->name))
+
+#define dma_readl(ddev, name) \
+	(__is_dmac64(ddev) ? \
+	dma64_readl(ddev, name) : dma32_readl(ddev, name))
+#define dma_writel(ddev, name, val) \
+	(__is_dmac64(ddev) ? \
+	dma64_writel(ddev, name, val) : dma32_writel(ddev, name, val))
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+static struct txx9dmac_desc *
+txd_to_txx9dmac_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct txx9dmac_desc, txd);
+}
+
+static dma_addr_t desc_read_CHAR(const struct txx9dmac_chan *dc,
+				 const struct txx9dmac_desc *desc)
+{
+	return is_dmac64(dc) ? desc->hwdesc.CHAR : desc->hwdesc32.CHAR;
+}
+
+static void desc_write_CHAR(const struct txx9dmac_chan *dc,
+			    struct txx9dmac_desc *desc, dma_addr_t val)
+{
+	if (is_dmac64(dc))
+		desc->hwdesc.CHAR = val;
+	else
+		desc->hwdesc32.CHAR = val;
+}
+
+#define TXX9_DMA_MAX_COUNT	0x04000000
+
+#define TXX9_DMA_INITIAL_DESC_COUNT	64
+
+static struct txx9dmac_desc *txx9dmac_first_active(struct txx9dmac_chan *dc)
+{
+	return list_entry(dc->active_list.next,
+			  struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_active(struct txx9dmac_chan *dc)
+{
+	return list_entry(dc->active_list.prev,
+			  struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
+{
+	return list_entry(dc->queue.next, struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
+{
+	if (!list_empty(&desc->tx_list))
+		desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
+	return desc;
+}
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
+						 gfp_t flags)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), flags);
+	if (!desc)
+		return NULL;
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
+	desc->txd.tx_submit = txx9dmac_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	desc->txd.flags = DMA_CTRL_ACK;
+	desc->txd.phys = dma_map_single(chan2parent(&dc->chan), &desc->hwdesc,
+					ddev->descsize, DMA_TO_DEVICE);
+	return desc;
+}
+
+static struct txx9dmac_desc *txx9dmac_desc_get(struct txx9dmac_chan *dc)
+{
+	struct txx9dmac_desc *desc, *_desc;
+	struct txx9dmac_desc *ret = NULL;
+	unsigned int i = 0;
+
+	spin_lock_bh(&dc->lock);
+	list_for_each_entry_safe(desc, _desc, &dc->free_list, desc_node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&dc->chan), "desc %p not ACKed\n", desc);
+		i++;
+	}
+	spin_unlock_bh(&dc->lock);
+
+	dev_vdbg(chan2dev(&dc->chan), "scanned %u descriptors on freelist\n",
+		 i);
+	if (!ret) {
+		ret = txx9dmac_desc_alloc(dc, GFP_ATOMIC);
+		if (ret) {
+			spin_lock_bh(&dc->lock);
+			dc->descs_allocated++;
+			spin_unlock_bh(&dc->lock);
+		} else
+			dev_err(chan2dev(&dc->chan),
+				"not enough descriptors available\n");
+	}
+	return ret;
+}
+
+static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
+				       struct txx9dmac_desc *desc)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *child;
+
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		dma_sync_single_for_cpu(chan2parent(&dc->chan),
+				child->txd.phys, ddev->descsize,
+				DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(chan2parent(&dc->chan),
+			desc->txd.phys, ddev->descsize,
+			DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
+			      struct txx9dmac_desc *desc)
+{
+	if (desc) {
+		struct txx9dmac_desc *child;
+
+		txx9dmac_sync_desc_for_cpu(dc, desc);
+
+		spin_lock_bh(&dc->lock);
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			dev_vdbg(chan2dev(&dc->chan),
+				 "moving child desc %p to freelist\n",
+				 child);
+		list_splice_init(&desc->tx_list, &dc->free_list);
+		dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
+			 desc);
+		list_add(&desc->desc_node, &dc->free_list);
+		spin_unlock_bh(&dc->lock);
+	}
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_dump_regs(struct txx9dmac_chan *dc)
+{
+	if (is_dmac64(dc))
+		dev_err(chan2dev(&dc->chan),
+			"  CHAR: %#llx SAR: %#llx DAR: %#llx CNTR: %#x"
+			" SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+			(u64)channel64_read_CHAR(dc),
+			channel64_readq(dc, SAR),
+			channel64_readq(dc, DAR),
+			channel64_readl(dc, CNTR),
+			channel64_readl(dc, SAIR),
+			channel64_readl(dc, DAIR),
+			channel64_readl(dc, CCR),
+			channel64_readl(dc, CSR));
+	else
+		dev_err(chan2dev(&dc->chan),
+			"  CHAR: %#x SAR: %#x DAR: %#x CNTR: %#x"
+			" SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+			channel32_readl(dc, CHAR),
+			channel32_readl(dc, SAR),
+			channel32_readl(dc, DAR),
+			channel32_readl(dc, CNTR),
+			channel32_readl(dc, SAIR),
+			channel32_readl(dc, DAIR),
+			channel32_readl(dc, CCR),
+			channel32_readl(dc, CSR));
+}
+
+static void txx9dmac_reset_chan(struct txx9dmac_chan *dc)
+{
+	channel_writel(dc, CCR, TXX9_DMA_CCR_CHRST);
+	if (is_dmac64(dc)) {
+		channel64_clear_CHAR(dc);
+		channel_writeq(dc, SAR, 0);
+		channel_writeq(dc, DAR, 0);
+	} else {
+		channel_writel(dc, CHAR, 0);
+		channel_writel(dc, SAR, 0);
+		channel_writel(dc, DAR, 0);
+	}
+	channel_writel(dc, CNTR, 0);
+	channel_writel(dc, SAIR, 0);
+	channel_writel(dc, DAIR, 0);
+	channel_writel(dc, CCR, 0);
+	mmiowb();
+}
+
+/* Called with dc->lock held and bh disabled */
+static void txx9dmac_dostart(struct txx9dmac_chan *dc,
+			     struct txx9dmac_desc *first)
+{
+	struct txx9dmac_slave *ds = dc->chan.private;
+	u32 sai, dai;
+
+	dev_vdbg(chan2dev(&dc->chan), "dostart %u %p\n",
+		 first->txd.cookie, first);
+	/* ASSERT:  channel is idle */
+	if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+		dev_err(chan2dev(&dc->chan),
+			"BUG: Attempted to start non-idle channel\n");
+		txx9dmac_dump_regs(dc);
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	if (is_dmac64(dc)) {
+		channel64_writel(dc, CNTR, 0);
+		channel64_writel(dc, CSR, 0xffffffff);
+		if (ds) {
+			if (ds->tx_reg) {
+				sai = ds->reg_width;
+				dai = 0;
+			} else {
+				sai = 0;
+				dai = ds->reg_width;
+			}
+		} else {
+			sai = 8;
+			dai = 8;
+		}
+		channel64_writel(dc, SAIR, sai);
+		channel64_writel(dc, DAIR, dai);
+		/* All 64-bit DMAC supports SMPCHN */
+		channel64_writel(dc, CCR, dc->ccr);
+		/* Writing a non zero value to CHAR will assert XFACT */
+		channel64_write_CHAR(dc, first->txd.phys);
+	} else {
+		channel32_writel(dc, CNTR, 0);
+		channel32_writel(dc, CSR, 0xffffffff);
+		if (ds) {
+			if (ds->tx_reg) {
+				sai = ds->reg_width;
+				dai = 0;
+			} else {
+				sai = 0;
+				dai = ds->reg_width;
+			}
+		} else {
+			sai = 4;
+			dai = 4;
+		}
+		channel32_writel(dc, SAIR, sai);
+		channel32_writel(dc, DAIR, dai);
+		if (txx9_dma_have_SMPCHN()) {
+			channel32_writel(dc, CCR, dc->ccr);
+			/* Writing a non zero value to CHAR will assert XFACT */
+			channel32_writel(dc, CHAR, first->txd.phys);
+		} else {
+			channel32_writel(dc, CHAR, first->txd.phys);
+			channel32_writel(dc, CCR, dc->ccr);
+		}
+	}
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
+			     struct txx9dmac_desc *desc)
+{
+	dma_async_tx_callback callback;
+	void *param;
+	struct dma_async_tx_descriptor *txd = &desc->txd;
+	struct txx9dmac_slave *ds = dc->chan.private;
+
+	dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n",
+		 txd->cookie, desc);
+
+	dma_cookie_complete(txd);
+	callback = txd->callback;
+	param = txd->callback_param;
+
+	txx9dmac_sync_desc_for_cpu(dc, desc);
+	list_splice_init(&desc->tx_list, &dc->free_list);
+	list_move(&desc->desc_node, &dc->free_list);
+
+	if (!ds) {
+		dma_addr_t dmaaddr;
+		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+			dmaaddr = is_dmac64(dc) ?
+				desc->hwdesc.DAR : desc->hwdesc32.DAR;
+			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+				dma_unmap_single(chan2parent(&dc->chan),
+					dmaaddr, desc->len, DMA_FROM_DEVICE);
+			else
+				dma_unmap_page(chan2parent(&dc->chan),
+					dmaaddr, desc->len, DMA_FROM_DEVICE);
+		}
+		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			dmaaddr = is_dmac64(dc) ?
+				desc->hwdesc.SAR : desc->hwdesc32.SAR;
+			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+				dma_unmap_single(chan2parent(&dc->chan),
+					dmaaddr, desc->len, DMA_TO_DEVICE);
+			else
+				dma_unmap_page(chan2parent(&dc->chan),
+					dmaaddr, desc->len, DMA_TO_DEVICE);
+		}
+	}
+
+	/*
+	 * The API requires that no submissions are done from a
+	 * callback, so we don't need to drop the lock here
+	 */
+	if (callback)
+		callback(param);
+	dma_run_dependencies(txd);
+}
+
+static void txx9dmac_dequeue(struct txx9dmac_chan *dc, struct list_head *list)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+	struct txx9dmac_desc *prev = NULL;
+
+	BUG_ON(!list_empty(list));
+	do {
+		desc = txx9dmac_first_queued(dc);
+		if (prev) {
+			desc_write_CHAR(dc, prev, desc->txd.phys);
+			dma_sync_single_for_device(chan2parent(&dc->chan),
+				prev->txd.phys, ddev->descsize,
+				DMA_TO_DEVICE);
+		}
+		prev = txx9dmac_last_child(desc);
+		list_move_tail(&desc->desc_node, list);
+		/* Make chain-completion interrupt happen */
+		if ((desc->txd.flags & DMA_PREP_INTERRUPT) &&
+		    !txx9dmac_chan_INTENT(dc))
+			break;
+	} while (!list_empty(&dc->queue));
+}
+
+static void txx9dmac_complete_all(struct txx9dmac_chan *dc)
+{
+	struct txx9dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	list_splice_init(&dc->active_list, &list);
+	if (!list_empty(&dc->queue)) {
+		txx9dmac_dequeue(dc, &dc->active_list);
+		txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+	}
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		txx9dmac_descriptor_complete(dc, desc);
+}
+
+static void txx9dmac_dump_desc(struct txx9dmac_chan *dc,
+			       struct txx9dmac_hwdesc *desc)
+{
+	if (is_dmac64(dc)) {
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#llx s%#llx d%#llx c%#x\n",
+			 (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR);
+#else
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#llx s%#llx d%#llx c%#x"
+			 " si%#x di%#x cc%#x cs%#x\n",
+			 (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR,
+			 desc->SAIR, desc->DAIR, desc->CCR, desc->CSR);
+#endif
+	} else {
+		struct txx9dmac_hwdesc32 *d = (struct txx9dmac_hwdesc32 *)desc;
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#x s%#x d%#x c%#x\n",
+			 d->CHAR, d->SAR, d->DAR, d->CNTR);
+#else
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#x s%#x d%#x c%#x"
+			 " si%#x di%#x cc%#x cs%#x\n",
+			 d->CHAR, d->SAR, d->DAR, d->CNTR,
+			 d->SAIR, d->DAIR, d->CCR, d->CSR);
+#endif
+	}
+}
+
+static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
+{
+	struct txx9dmac_desc *bad_desc;
+	struct txx9dmac_desc *child;
+	u32 errors;
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * borked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	dev_crit(chan2dev(&dc->chan), "Abnormal Chain Completion\n");
+	txx9dmac_dump_regs(dc);
+
+	bad_desc = txx9dmac_first_active(dc);
+	list_del_init(&bad_desc->desc_node);
+
+	/* Clear all error flags and try to restart the controller */
+	errors = csr & (TXX9_DMA_CSR_ABCHC |
+			TXX9_DMA_CSR_CFERR | TXX9_DMA_CSR_CHERR |
+			TXX9_DMA_CSR_DESERR | TXX9_DMA_CSR_SORERR);
+	channel_writel(dc, CSR, errors);
+
+	if (list_empty(&dc->active_list) && !list_empty(&dc->queue))
+		txx9dmac_dequeue(dc, &dc->active_list);
+	if (!list_empty(&dc->active_list))
+		txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+
+	dev_crit(chan2dev(&dc->chan),
+		 "Bad descriptor submitted for DMA! (cookie: %d)\n",
+		 bad_desc->txd.cookie);
+	txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
+	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+		txx9dmac_dump_desc(dc, &child->hwdesc);
+	/* Pretend the descriptor completed successfully */
+	txx9dmac_descriptor_complete(dc, bad_desc);
+}
+
+static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
+{
+	dma_addr_t chain;
+	struct txx9dmac_desc *desc, *_desc;
+	struct txx9dmac_desc *child;
+	u32 csr;
+
+	if (is_dmac64(dc)) {
+		chain = channel64_read_CHAR(dc);
+		csr = channel64_readl(dc, CSR);
+		channel64_writel(dc, CSR, csr);
+	} else {
+		chain = channel32_readl(dc, CHAR);
+		csr = channel32_readl(dc, CSR);
+		channel32_writel(dc, CSR, csr);
+	}
+	/* For dynamic chain, we should look at XFACT instead of NCHNC */
+	if (!(csr & (TXX9_DMA_CSR_XFACT | TXX9_DMA_CSR_ABCHC))) {
+		/* Everything we've submitted is done */
+		txx9dmac_complete_all(dc);
+		return;
+	}
+	if (!(csr & TXX9_DMA_CSR_CHNEN))
+		chain = 0;	/* last descriptor of this chain */
+
+	dev_vdbg(chan2dev(&dc->chan), "scan_descriptors: char=%#llx\n",
+		 (u64)chain);
+
+	list_for_each_entry_safe(desc, _desc, &dc->active_list, desc_node) {
+		if (desc_read_CHAR(dc, desc) == chain) {
+			/* This one is currently in progress */
+			if (csr & TXX9_DMA_CSR_ABCHC)
+				goto scan_done;
+			return;
+		}
+
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			if (desc_read_CHAR(dc, child) == chain) {
+				/* Currently in progress */
+				if (csr & TXX9_DMA_CSR_ABCHC)
+					goto scan_done;
+				return;
+			}
+
+		/*
+		 * No descriptors so far seem to be in progress, i.e.
+		 * this one must be done.
+		 */
+		txx9dmac_descriptor_complete(dc, desc);
+	}
+scan_done:
+	if (csr & TXX9_DMA_CSR_ABCHC) {
+		txx9dmac_handle_error(dc, csr);
+		return;
+	}
+
+	dev_err(chan2dev(&dc->chan),
+		"BUG: All descriptors done, but channel not idle!\n");
+
+	/* Try to continue after resetting the channel... */
+	txx9dmac_reset_chan(dc);
+
+	if (!list_empty(&dc->queue)) {
+		txx9dmac_dequeue(dc, &dc->active_list);
+		txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+	}
+}
+
+static void txx9dmac_chan_tasklet(unsigned long data)
+{
+	int irq;
+	u32 csr;
+	struct txx9dmac_chan *dc;
+
+	dc = (struct txx9dmac_chan *)data;
+	csr = channel_readl(dc, CSR);
+	dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", csr);
+
+	spin_lock(&dc->lock);
+	if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+		   TXX9_DMA_CSR_NTRNFC))
+		txx9dmac_scan_descriptors(dc);
+	spin_unlock(&dc->lock);
+	irq = dc->irq;
+
+	enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_chan_interrupt(int irq, void *dev_id)
+{
+	struct txx9dmac_chan *dc = dev_id;
+
+	dev_vdbg(chan2dev(&dc->chan), "interrupt: status=%#x\n",
+			channel_readl(dc, CSR));
+
+	tasklet_schedule(&dc->tasklet);
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	disable_irq_nosync(irq);
+
+	return IRQ_HANDLED;
+}
+
+static void txx9dmac_tasklet(unsigned long data)
+{
+	int irq;
+	u32 csr;
+	struct txx9dmac_chan *dc;
+
+	struct txx9dmac_dev *ddev = (struct txx9dmac_dev *)data;
+	u32 mcr;
+	int i;
+
+	mcr = dma_readl(ddev, MCR);
+	dev_vdbg(ddev->chan[0]->dma.dev, "tasklet: mcr=%x\n", mcr);
+	for (i = 0; i < TXX9_DMA_MAX_NR_CHANNELS; i++) {
+		if ((mcr >> (24 + i)) & 0x11) {
+			dc = ddev->chan[i];
+			csr = channel_readl(dc, CSR);
+			dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n",
+				 csr);
+			spin_lock(&dc->lock);
+			if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+				   TXX9_DMA_CSR_NTRNFC))
+				txx9dmac_scan_descriptors(dc);
+			spin_unlock(&dc->lock);
+		}
+	}
+	irq = ddev->irq;
+
+	enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_interrupt(int irq, void *dev_id)
+{
+	struct txx9dmac_dev *ddev = dev_id;
+
+	dev_vdbg(ddev->chan[0]->dma.dev, "interrupt: status=%#x\n",
+			dma_readl(ddev, MCR));
+
+	tasklet_schedule(&ddev->tasklet);
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	disable_irq_nosync(irq);
+
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct txx9dmac_desc *desc = txd_to_txx9dmac_desc(tx);
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(tx->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&dc->lock);
+	cookie = dma_cookie_assign(tx);
+
+	dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n",
+		 desc->txd.cookie, desc);
+
+	list_add_tail(&desc->desc_node, &dc->queue);
+	spin_unlock_bh(&dc->lock);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+	struct txx9dmac_desc *first;
+	struct txx9dmac_desc *prev;
+	size_t xfer_count;
+	size_t offset;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy d%#llx s%#llx l%#zx f%#lx\n",
+		 (u64)dest, (u64)src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	prev = first = NULL;
+
+	for (offset = 0; offset < len; offset += xfer_count) {
+		xfer_count = min_t(size_t, len - offset, TXX9_DMA_MAX_COUNT);
+		/*
+		 * Workaround for ERT-TX49H2-033, ERT-TX49H3-020,
+		 * ERT-TX49H4-016 (slightly conservative)
+		 */
+		if (__is_dmac64(ddev)) {
+			if (xfer_count > 0x100 &&
+			    (xfer_count & 0xff) >= 0xfa &&
+			    (xfer_count & 0xff) <= 0xff)
+				xfer_count -= 0x20;
+		} else {
+			if (xfer_count > 0x80 &&
+			    (xfer_count & 0x7f) >= 0x7e &&
+			    (xfer_count & 0x7f) <= 0x7f)
+				xfer_count -= 0x20;
+		}
+
+		desc = txx9dmac_desc_get(dc);
+		if (!desc) {
+			txx9dmac_desc_put(dc, first);
+			return NULL;
+		}
+
+		if (__is_dmac64(ddev)) {
+			desc->hwdesc.SAR = src + offset;
+			desc->hwdesc.DAR = dest + offset;
+			desc->hwdesc.CNTR = xfer_count;
+			txx9dmac_desc_set_nosimple(ddev, desc, 8, 8,
+					dc->ccr | TXX9_DMA_CCR_XFACT);
+		} else {
+			desc->hwdesc32.SAR = src + offset;
+			desc->hwdesc32.DAR = dest + offset;
+			desc->hwdesc32.CNTR = xfer_count;
+			txx9dmac_desc_set_nosimple(ddev, desc, 4, 4,
+					dc->ccr | TXX9_DMA_CCR_XFACT);
+		}
+
+		/*
+		 * The descriptors on tx_list are not reachable from
+		 * the dc->queue list or dc->active_list after a
+		 * submit.  If we put all descriptors on active_list,
+		 * calling of callback on the completion will be more
+		 * complex.
+		 */
+		if (!first) {
+			first = desc;
+		} else {
+			desc_write_CHAR(dc, prev, desc->txd.phys);
+			dma_sync_single_for_device(chan2parent(&dc->chan),
+					prev->txd.phys, ddev->descsize,
+					DMA_TO_DEVICE);
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+		prev = desc;
+	}
+
+	/* Trigger interrupt after last block */
+	if (flags & DMA_PREP_INTERRUPT)
+		txx9dmac_desc_set_INTENT(ddev, prev);
+
+	desc_write_CHAR(dc, prev, 0);
+	dma_sync_single_for_device(chan2parent(&dc->chan),
+			prev->txd.phys, ddev->descsize,
+			DMA_TO_DEVICE);
+
+	first->txd.flags = flags;
+	first->len = len;
+
+	return &first->txd;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_slave *ds = chan->private;
+	struct txx9dmac_desc *prev;
+	struct txx9dmac_desc *first;
+	unsigned int i;
+	struct scatterlist *sg;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+
+	BUG_ON(!ds || !ds->reg_width);
+	if (ds->tx_reg)
+		BUG_ON(direction != DMA_MEM_TO_DEV);
+	else
+		BUG_ON(direction != DMA_DEV_TO_MEM);
+	if (unlikely(!sg_len))
+		return NULL;
+
+	prev = first = NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct txx9dmac_desc *desc;
+		dma_addr_t mem;
+		u32 sai, dai;
+
+		desc = txx9dmac_desc_get(dc);
+		if (!desc) {
+			txx9dmac_desc_put(dc, first);
+			return NULL;
+		}
+
+		mem = sg_dma_address(sg);
+
+		if (__is_dmac64(ddev)) {
+			if (direction == DMA_MEM_TO_DEV) {
+				desc->hwdesc.SAR = mem;
+				desc->hwdesc.DAR = ds->tx_reg;
+			} else {
+				desc->hwdesc.SAR = ds->rx_reg;
+				desc->hwdesc.DAR = mem;
+			}
+			desc->hwdesc.CNTR = sg_dma_len(sg);
+		} else {
+			if (direction == DMA_MEM_TO_DEV) {
+				desc->hwdesc32.SAR = mem;
+				desc->hwdesc32.DAR = ds->tx_reg;
+			} else {
+				desc->hwdesc32.SAR = ds->rx_reg;
+				desc->hwdesc32.DAR = mem;
+			}
+			desc->hwdesc32.CNTR = sg_dma_len(sg);
+		}
+		if (direction == DMA_MEM_TO_DEV) {
+			sai = ds->reg_width;
+			dai = 0;
+		} else {
+			sai = 0;
+			dai = ds->reg_width;
+		}
+		txx9dmac_desc_set_nosimple(ddev, desc, sai, dai,
+					dc->ccr | TXX9_DMA_CCR_XFACT);
+
+		if (!first) {
+			first = desc;
+		} else {
+			desc_write_CHAR(dc, prev, desc->txd.phys);
+			dma_sync_single_for_device(chan2parent(&dc->chan),
+					prev->txd.phys,
+					ddev->descsize,
+					DMA_TO_DEVICE);
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+		prev = desc;
+	}
+
+	/* Trigger interrupt after last block */
+	if (flags & DMA_PREP_INTERRUPT)
+		txx9dmac_desc_set_INTENT(ddev, prev);
+
+	desc_write_CHAR(dc, prev, 0);
+	dma_sync_single_for_device(chan2parent(&dc->chan),
+			prev->txd.phys, ddev->descsize,
+			DMA_TO_DEVICE);
+
+	first->txd.flags = flags;
+	first->len = 0;
+
+	return &first->txd;
+}
+
+static int txx9dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			    unsigned long arg)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	/* Only supports DMA_TERMINATE_ALL */
+	if (cmd != DMA_TERMINATE_ALL)
+		return -EINVAL;
+
+	dev_vdbg(chan2dev(chan), "terminate_all\n");
+	spin_lock_bh(&dc->lock);
+
+	txx9dmac_reset_chan(dc);
+
+	/* active_list entries will end up before queued entries */
+	list_splice_init(&dc->queue, &list);
+	list_splice_init(&dc->active_list, &list);
+
+	spin_unlock_bh(&dc->lock);
+
+	/* Flush all pending and queued descriptors */
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		txx9dmac_descriptor_complete(dc, desc);
+
+	return 0;
+}
+
+static enum dma_status
+txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		   struct dma_tx_state *txstate)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS) {
+		spin_lock_bh(&dc->lock);
+		txx9dmac_scan_descriptors(dc);
+		spin_unlock_bh(&dc->lock);
+
+		ret = dma_cookie_status(chan, cookie, txstate);
+	}
+
+	return ret;
+}
+
+static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
+				   struct txx9dmac_desc *prev)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+	LIST_HEAD(list);
+
+	prev = txx9dmac_last_child(prev);
+	txx9dmac_dequeue(dc, &list);
+	desc = list_entry(list.next, struct txx9dmac_desc, desc_node);
+	desc_write_CHAR(dc, prev, desc->txd.phys);
+	dma_sync_single_for_device(chan2parent(&dc->chan),
+				   prev->txd.phys, ddev->descsize,
+				   DMA_TO_DEVICE);
+	mmiowb();
+	if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) &&
+	    channel_read_CHAR(dc) == prev->txd.phys)
+		/* Restart chain DMA */
+		channel_write_CHAR(dc, desc->txd.phys);
+	list_splice_tail(&list, &dc->active_list);
+}
+
+static void txx9dmac_issue_pending(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+
+	spin_lock_bh(&dc->lock);
+
+	if (!list_empty(&dc->active_list))
+		txx9dmac_scan_descriptors(dc);
+	if (!list_empty(&dc->queue)) {
+		if (list_empty(&dc->active_list)) {
+			txx9dmac_dequeue(dc, &dc->active_list);
+			txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+		} else if (txx9_dma_have_SMPCHN()) {
+			struct txx9dmac_desc *prev = txx9dmac_last_active(dc);
+
+			if (!(prev->txd.flags & DMA_PREP_INTERRUPT) ||
+			    txx9dmac_chan_INTENT(dc))
+				txx9dmac_chain_dynamic(dc, prev);
+		}
+	}
+
+	spin_unlock_bh(&dc->lock);
+}
+
+static int txx9dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_slave *ds = chan->private;
+	struct txx9dmac_desc *desc;
+	int i;
+
+	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+	/* ASSERT:  channel is idle */
+	if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+		return -EIO;
+	}
+
+	dma_cookie_init(chan);
+
+	dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE;
+	txx9dmac_chan_set_SMPCHN(dc);
+	if (!txx9_dma_have_SMPCHN() || (dc->ccr & TXX9_DMA_CCR_SMPCHN))
+		dc->ccr |= TXX9_DMA_CCR_INTENC;
+	if (chan->device->device_prep_dma_memcpy) {
+		if (ds)
+			return -EINVAL;
+		dc->ccr |= TXX9_DMA_CCR_XFSZ_X8;
+	} else {
+		if (!ds ||
+		    (ds->tx_reg && ds->rx_reg) || (!ds->tx_reg && !ds->rx_reg))
+			return -EINVAL;
+		dc->ccr |= TXX9_DMA_CCR_EXTRQ |
+			TXX9_DMA_CCR_XFSZ(__ffs(ds->reg_width));
+		txx9dmac_chan_set_INTENT(dc);
+	}
+
+	spin_lock_bh(&dc->lock);
+	i = dc->descs_allocated;
+	while (dc->descs_allocated < TXX9_DMA_INITIAL_DESC_COUNT) {
+		spin_unlock_bh(&dc->lock);
+
+		desc = txx9dmac_desc_alloc(dc, GFP_KERNEL);
+		if (!desc) {
+			dev_info(chan2dev(chan),
+				"only allocated %d descriptors\n", i);
+			spin_lock_bh(&dc->lock);
+			break;
+		}
+		txx9dmac_desc_put(dc, desc);
+
+		spin_lock_bh(&dc->lock);
+		i = ++dc->descs_allocated;
+	}
+	spin_unlock_bh(&dc->lock);
+
+	dev_dbg(chan2dev(chan),
+		"alloc_chan_resources allocated %d descriptors\n", i);
+
+	return i;
+}
+
+static void txx9dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+			dc->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&dc->active_list));
+	BUG_ON(!list_empty(&dc->queue));
+	BUG_ON(channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT);
+
+	spin_lock_bh(&dc->lock);
+	list_splice_init(&dc->free_list, &list);
+	dc->descs_allocated = 0;
+	spin_unlock_bh(&dc->lock);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
+		dma_unmap_single(chan2parent(chan), desc->txd.phys,
+				 ddev->descsize, DMA_TO_DEVICE);
+		kfree(desc);
+	}
+
+	dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_off(struct txx9dmac_dev *ddev)
+{
+	dma_writel(ddev, MCR, 0);
+	mmiowb();
+}
+
+static int __init txx9dmac_chan_probe(struct platform_device *pdev)
+{
+	struct txx9dmac_chan_platform_data *cpdata = pdev->dev.platform_data;
+	struct platform_device *dmac_dev = cpdata->dmac_dev;
+	struct txx9dmac_platform_data *pdata = dmac_dev->dev.platform_data;
+	struct txx9dmac_chan *dc;
+	int err;
+	int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS;
+	int irq;
+
+	dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
+	if (!dc)
+		return -ENOMEM;
+
+	dc->dma.dev = &pdev->dev;
+	dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources;
+	dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources;
+	dc->dma.device_control = txx9dmac_control;
+	dc->dma.device_tx_status = txx9dmac_tx_status;
+	dc->dma.device_issue_pending = txx9dmac_issue_pending;
+	if (pdata && pdata->memcpy_chan == ch) {
+		dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy;
+		dma_cap_set(DMA_MEMCPY, dc->dma.cap_mask);
+	} else {
+		dc->dma.device_prep_slave_sg = txx9dmac_prep_slave_sg;
+		dma_cap_set(DMA_SLAVE, dc->dma.cap_mask);
+		dma_cap_set(DMA_PRIVATE, dc->dma.cap_mask);
+	}
+
+	INIT_LIST_HEAD(&dc->dma.channels);
+	dc->ddev = platform_get_drvdata(dmac_dev);
+	if (dc->ddev->irq < 0) {
+		irq = platform_get_irq(pdev, 0);
+		if (irq < 0)
+			return irq;
+		tasklet_init(&dc->tasklet, txx9dmac_chan_tasklet,
+				(unsigned long)dc);
+		dc->irq = irq;
+		err = devm_request_irq(&pdev->dev, dc->irq,
+			txx9dmac_chan_interrupt, 0, dev_name(&pdev->dev), dc);
+		if (err)
+			return err;
+	} else
+		dc->irq = -1;
+	dc->ddev->chan[ch] = dc;
+	dc->chan.device = &dc->dma;
+	list_add_tail(&dc->chan.device_node, &dc->chan.device->channels);
+	dma_cookie_init(&dc->chan);
+
+	if (is_dmac64(dc))
+		dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch];
+	else
+		dc->ch_regs = &__txx9dmac_regs32(dc->ddev)->CHAN[ch];
+	spin_lock_init(&dc->lock);
+
+	INIT_LIST_HEAD(&dc->active_list);
+	INIT_LIST_HEAD(&dc->queue);
+	INIT_LIST_HEAD(&dc->free_list);
+
+	txx9dmac_reset_chan(dc);
+
+	platform_set_drvdata(pdev, dc);
+
+	err = dma_async_device_register(&dc->dma);
+	if (err)
+		return err;
+	dev_dbg(&pdev->dev, "TXx9 DMA Channel (dma%d%s%s)\n",
+		dc->dma.dev_id,
+		dma_has_cap(DMA_MEMCPY, dc->dma.cap_mask) ? " memcpy" : "",
+		dma_has_cap(DMA_SLAVE, dc->dma.cap_mask) ? " slave" : "");
+
+	return 0;
+}
+
+static int __exit txx9dmac_chan_remove(struct platform_device *pdev)
+{
+	struct txx9dmac_chan *dc = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&dc->dma);
+	if (dc->irq >= 0)
+		tasklet_kill(&dc->tasklet);
+	dc->ddev->chan[pdev->id % TXX9_DMA_MAX_NR_CHANNELS] = NULL;
+	return 0;
+}
+
+static int __init txx9dmac_probe(struct platform_device *pdev)
+{
+	struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+	struct resource *io;
+	struct txx9dmac_dev *ddev;
+	u32 mcr;
+	int err;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -EINVAL;
+
+	ddev = devm_kzalloc(&pdev->dev, sizeof(*ddev), GFP_KERNEL);
+	if (!ddev)
+		return -ENOMEM;
+
+	if (!devm_request_mem_region(&pdev->dev, io->start, resource_size(io),
+				     dev_name(&pdev->dev)))
+		return -EBUSY;
+
+	ddev->regs = devm_ioremap(&pdev->dev, io->start, resource_size(io));
+	if (!ddev->regs)
+		return -ENOMEM;
+	ddev->have_64bit_regs = pdata->have_64bit_regs;
+	if (__is_dmac64(ddev))
+		ddev->descsize = sizeof(struct txx9dmac_hwdesc);
+	else
+		ddev->descsize = sizeof(struct txx9dmac_hwdesc32);
+
+	/* force dma off, just in case */
+	txx9dmac_off(ddev);
+
+	ddev->irq = platform_get_irq(pdev, 0);
+	if (ddev->irq >= 0) {
+		tasklet_init(&ddev->tasklet, txx9dmac_tasklet,
+				(unsigned long)ddev);
+		err = devm_request_irq(&pdev->dev, ddev->irq,
+			txx9dmac_interrupt, 0, dev_name(&pdev->dev), ddev);
+		if (err)
+			return err;
+	}
+
+	mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+	if (pdata && pdata->memcpy_chan >= 0)
+		mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+	dma_writel(ddev, MCR, mcr);
+
+	platform_set_drvdata(pdev, ddev);
+	return 0;
+}
+
+static int __exit txx9dmac_remove(struct platform_device *pdev)
+{
+	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+	txx9dmac_off(ddev);
+	if (ddev->irq >= 0)
+		tasklet_kill(&ddev->tasklet);
+	return 0;
+}
+
+static void txx9dmac_shutdown(struct platform_device *pdev)
+{
+	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+	txx9dmac_off(ddev);
+}
+
+static int txx9dmac_suspend_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+	txx9dmac_off(ddev);
+	return 0;
+}
+
+static int txx9dmac_resume_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+	struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+	u32 mcr;
+
+	mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+	if (pdata && pdata->memcpy_chan >= 0)
+		mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+	dma_writel(ddev, MCR, mcr);
+	return 0;
+
+}
+
+static const struct dev_pm_ops txx9dmac_dev_pm_ops = {
+	.suspend_noirq = txx9dmac_suspend_noirq,
+	.resume_noirq = txx9dmac_resume_noirq,
+};
+
+static struct platform_driver txx9dmac_chan_driver = {
+	.remove		= __exit_p(txx9dmac_chan_remove),
+	.driver = {
+		.name	= "txx9dmac-chan",
+	},
+};
+
+static struct platform_driver txx9dmac_driver = {
+	.remove		= __exit_p(txx9dmac_remove),
+	.shutdown	= txx9dmac_shutdown,
+	.driver = {
+		.name	= "txx9dmac",
+		.pm	= &txx9dmac_dev_pm_ops,
+	},
+};
+
+static int __init txx9dmac_init(void)
+{
+	int rc;
+
+	rc = platform_driver_probe(&txx9dmac_driver, txx9dmac_probe);
+	if (!rc) {
+		rc = platform_driver_probe(&txx9dmac_chan_driver,
+					   txx9dmac_chan_probe);
+		if (rc)
+			platform_driver_unregister(&txx9dmac_driver);
+	}
+	return rc;
+}
+module_init(txx9dmac_init);
+
+static void __exit txx9dmac_exit(void)
+{
+	platform_driver_unregister(&txx9dmac_chan_driver);
+	platform_driver_unregister(&txx9dmac_driver);
+}
+module_exit(txx9dmac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TXx9 DMA Controller driver");
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_ALIAS("platform:txx9dmac");
+MODULE_ALIAS("platform:txx9dmac-chan");
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
new file mode 100644
index 00000000..f5a76059
--- /dev/null
+++ b/drivers/dma/txx9dmac.h
@@ -0,0 +1,307 @@
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef TXX9DMAC_H
+#define TXX9DMAC_H
+
+#include <linux/dmaengine.h>
+#include <asm/txx9/dmac.h>
+
+/*
+ * Design Notes:
+ *
+ * This DMAC have four channels and one FIFO buffer.  Each channel can
+ * be configured for memory-memory or device-memory transfer, but only
+ * one channel can do alignment-free memory-memory transfer at a time
+ * while the channel should occupy the FIFO buffer for effective
+ * transfers.
+ *
+ * Instead of dynamically assign the FIFO buffer to channels, I chose
+ * make one dedicated channel for memory-memory transfer.  The
+ * dedicated channel is public.  Other channels are private and used
+ * for slave transfer.  Some devices in the SoC are wired to certain
+ * DMA channel.
+ */
+
+#ifdef CONFIG_MACH_TX49XX
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+	return true;
+}
+#define TXX9_DMA_USE_SIMPLE_CHAIN
+#else
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+	return false;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN
+#ifdef CONFIG_MACH_TX49XX
+#define CCR_LE	TXX9_DMA_CCR_LE
+#define MCR_LE	0
+#else
+#define CCR_LE	0
+#define MCR_LE	TXX9_DMA_MCR_LE
+#endif
+#else
+#define CCR_LE	0
+#define MCR_LE	0
+#endif
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#ifdef __BIG_ENDIAN
+#define TXX9_DMA_REG32(name)		u32 __pad_##name; u32 name
+#else
+#define TXX9_DMA_REG32(name)		u32 name; u32 __pad_##name
+#endif
+
+/* Hardware register definitions. */
+struct txx9dmac_cregs {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR)
+	TXX9_DMA_REG32(CHAR);	/* Chain Address Register */
+#else
+	u64 CHAR;		/* Chain Address Register */
+#endif
+	u64 SAR;		/* Source Address Register */
+	u64 DAR;		/* Destination Address Register */
+	TXX9_DMA_REG32(CNTR);	/* Count Register */
+	TXX9_DMA_REG32(SAIR);	/* Source Address Increment Register */
+	TXX9_DMA_REG32(DAIR);	/* Destination Address Increment Register */
+	TXX9_DMA_REG32(CCR);	/* Channel Control Register */
+	TXX9_DMA_REG32(CSR);	/* Channel Status Register */
+};
+struct txx9dmac_cregs32 {
+	u32 CHAR;
+	u32 SAR;
+	u32 DAR;
+	u32 CNTR;
+	u32 SAIR;
+	u32 DAIR;
+	u32 CCR;
+	u32 CSR;
+};
+
+struct txx9dmac_regs {
+	/* per-channel registers */
+	struct txx9dmac_cregs	CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+	u64	__pad[9];
+	u64	MFDR;		/* Memory Fill Data Register */
+	TXX9_DMA_REG32(MCR);	/* Master Control Register */
+};
+struct txx9dmac_regs32 {
+	struct txx9dmac_cregs32	CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+	u32	__pad[9];
+	u32	MFDR;
+	u32	MCR;
+};
+
+/* bits for MCR */
+#define TXX9_DMA_MCR_EIS(ch)	(0x10000000<<(ch))
+#define TXX9_DMA_MCR_DIS(ch)	(0x01000000<<(ch))
+#define TXX9_DMA_MCR_RSFIF	0x00000080
+#define TXX9_DMA_MCR_FIFUM(ch)	(0x00000008<<(ch))
+#define TXX9_DMA_MCR_LE		0x00000004
+#define TXX9_DMA_MCR_RPRT	0x00000002
+#define TXX9_DMA_MCR_MSTEN	0x00000001
+
+/* bits for CCRn */
+#define TXX9_DMA_CCR_IMMCHN	0x20000000
+#define TXX9_DMA_CCR_USEXFSZ	0x10000000
+#define TXX9_DMA_CCR_LE		0x08000000
+#define TXX9_DMA_CCR_DBINH	0x04000000
+#define TXX9_DMA_CCR_SBINH	0x02000000
+#define TXX9_DMA_CCR_CHRST	0x01000000
+#define TXX9_DMA_CCR_RVBYTE	0x00800000
+#define TXX9_DMA_CCR_ACKPOL	0x00400000
+#define TXX9_DMA_CCR_REQPL	0x00200000
+#define TXX9_DMA_CCR_EGREQ	0x00100000
+#define TXX9_DMA_CCR_CHDN	0x00080000
+#define TXX9_DMA_CCR_DNCTL	0x00060000
+#define TXX9_DMA_CCR_EXTRQ	0x00010000
+#define TXX9_DMA_CCR_INTRQD	0x0000e000
+#define TXX9_DMA_CCR_INTENE	0x00001000
+#define TXX9_DMA_CCR_INTENC	0x00000800
+#define TXX9_DMA_CCR_INTENT	0x00000400
+#define TXX9_DMA_CCR_CHNEN	0x00000200
+#define TXX9_DMA_CCR_XFACT	0x00000100
+#define TXX9_DMA_CCR_SMPCHN	0x00000020
+#define TXX9_DMA_CCR_XFSZ(order)	(((order) << 2) & 0x0000001c)
+#define TXX9_DMA_CCR_XFSZ_1	TXX9_DMA_CCR_XFSZ(0)
+#define TXX9_DMA_CCR_XFSZ_2	TXX9_DMA_CCR_XFSZ(1)
+#define TXX9_DMA_CCR_XFSZ_4	TXX9_DMA_CCR_XFSZ(2)
+#define TXX9_DMA_CCR_XFSZ_8	TXX9_DMA_CCR_XFSZ(3)
+#define TXX9_DMA_CCR_XFSZ_X4	TXX9_DMA_CCR_XFSZ(4)
+#define TXX9_DMA_CCR_XFSZ_X8	TXX9_DMA_CCR_XFSZ(5)
+#define TXX9_DMA_CCR_XFSZ_X16	TXX9_DMA_CCR_XFSZ(6)
+#define TXX9_DMA_CCR_XFSZ_X32	TXX9_DMA_CCR_XFSZ(7)
+#define TXX9_DMA_CCR_MEMIO	0x00000002
+#define TXX9_DMA_CCR_SNGAD	0x00000001
+
+/* bits for CSRn */
+#define TXX9_DMA_CSR_CHNEN	0x00000400
+#define TXX9_DMA_CSR_STLXFER	0x00000200
+#define TXX9_DMA_CSR_XFACT	0x00000100
+#define TXX9_DMA_CSR_ABCHC	0x00000080
+#define TXX9_DMA_CSR_NCHNC	0x00000040
+#define TXX9_DMA_CSR_NTRNFC	0x00000020
+#define TXX9_DMA_CSR_EXTDN	0x00000010
+#define TXX9_DMA_CSR_CFERR	0x00000008
+#define TXX9_DMA_CSR_CHERR	0x00000004
+#define TXX9_DMA_CSR_DESERR	0x00000002
+#define TXX9_DMA_CSR_SORERR	0x00000001
+
+struct txx9dmac_chan {
+	struct dma_chan		chan;
+	struct dma_device	dma;
+	struct txx9dmac_dev	*ddev;
+	void __iomem		*ch_regs;
+	struct tasklet_struct	tasklet;
+	int			irq;
+	u32			ccr;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+
+	unsigned int		descs_allocated;
+};
+
+struct txx9dmac_dev {
+	void __iomem		*regs;
+	struct tasklet_struct	tasklet;
+	int			irq;
+	struct txx9dmac_chan	*chan[TXX9_DMA_MAX_NR_CHANNELS];
+	bool			have_64bit_regs;
+	unsigned int		descsize;
+};
+
+static inline bool __is_dmac64(const struct txx9dmac_dev *ddev)
+{
+	return ddev->have_64bit_regs;
+}
+
+static inline bool is_dmac64(const struct txx9dmac_chan *dc)
+{
+	return __is_dmac64(dc->ddev);
+}
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+/* Hardware descriptor definition. (for simple-chain) */
+struct txx9dmac_hwdesc {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR)
+	TXX9_DMA_REG32(CHAR);
+#else
+	u64 CHAR;
+#endif
+	u64 SAR;
+	u64 DAR;
+	TXX9_DMA_REG32(CNTR);
+};
+struct txx9dmac_hwdesc32 {
+	u32 CHAR;
+	u32 SAR;
+	u32 DAR;
+	u32 CNTR;
+};
+#else
+#define txx9dmac_hwdesc txx9dmac_cregs
+#define txx9dmac_hwdesc32 txx9dmac_cregs32
+#endif
+
+struct txx9dmac_desc {
+	/* FIRST values the hardware uses */
+	union {
+		struct txx9dmac_hwdesc hwdesc;
+		struct txx9dmac_hwdesc32 hwdesc32;
+	};
+
+	/* THEN values for driver housekeeping */
+	struct list_head		desc_node ____cacheline_aligned;
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+};
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+	return (dc->ccr & TXX9_DMA_CCR_INTENT) != 0;
+}
+
+static inline void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+	dc->ccr |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+					    struct txx9dmac_desc *desc)
+{
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+	dc->ccr |= TXX9_DMA_CCR_SMPCHN;
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+					      struct txx9dmac_desc *desc,
+					      u32 sair, u32 dair, u32 ccr)
+{
+}
+
+#else /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+	return true;
+}
+
+static void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+					    struct txx9dmac_desc *desc)
+{
+	if (__is_dmac64(ddev))
+		desc->hwdesc.CCR |= TXX9_DMA_CCR_INTENT;
+	else
+		desc->hwdesc32.CCR |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+					      struct txx9dmac_desc *desc,
+					      u32 sai, u32 dai, u32 ccr)
+{
+	if (__is_dmac64(ddev)) {
+		desc->hwdesc.SAIR = sai;
+		desc->hwdesc.DAIR = dai;
+		desc->hwdesc.CCR = ccr;
+	} else {
+		desc->hwdesc32.SAIR = sai;
+		desc->hwdesc32.DAIR = dai;
+		desc->hwdesc32.CCR = ccr;
+	}
+}
+
+#endif /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+#endif /* TXX9DMAC_H */