diff options
Diffstat (limited to 'drivers/dma')
58 files changed, 50144 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig new file mode 100644 index 00000000..ef378b5b --- /dev/null +++ b/drivers/dma/Kconfig @@ -0,0 +1,297 @@ +# +# DMA engine configuration +# + +menuconfig DMADEVICES + bool "DMA Engine support" + depends on HAS_DMA + help + DMA engines can do asynchronous data transfers without + involving the host CPU. Currently, this framework can be + used to offload memory copies in the network stack and + RAID operations in the MD driver. This menu only presents + DMA Device drivers supported by the configured arch, it may + be empty in some cases. + +config DMADEVICES_DEBUG + bool "DMA Engine debugging" + depends on DMADEVICES != n + help + This is an option for use by developers; most people should + say N here. This enables DMA engine core and driver debugging. + +config DMADEVICES_VDEBUG + bool "DMA Engine verbose debugging" + depends on DMADEVICES_DEBUG != n + help + This is an option for use by developers; most people should + say N here. This enables deeper (more verbose) debugging of + the DMA engine core and drivers. + + +if DMADEVICES + +comment "DMA Devices" + +config INTEL_MID_DMAC + tristate "Intel MID DMA support for Peripheral DMA controllers" + depends on PCI && X86 + select DMA_ENGINE + default n + help + Enable support for the Intel(R) MID DMA engine present + in Intel MID chipsets. + + Say Y here if you have such a chipset. + + If unsure, say N. + +config ASYNC_TX_ENABLE_CHANNEL_SWITCH + bool + +config AMBA_PL08X + bool "ARM PrimeCell PL080 or PL081 support" + depends on ARM_AMBA && EXPERIMENTAL + select DMA_ENGINE + help + Platform has a PL08x DMAC device + which can provide DMA engine support + +config INTEL_IOATDMA + tristate "Intel I/OAT DMA support" + depends on PCI && X86 + select DMA_ENGINE + select DCA + select ASYNC_TX_DISABLE_PQ_VAL_DMA + select ASYNC_TX_DISABLE_XOR_VAL_DMA + help + Enable support for the Intel(R) I/OAT DMA engine present + in recent Intel Xeon chipsets. + + Say Y here if you have such a chipset. + + If unsure, say N. + +config INTEL_IOP_ADMA + tristate "Intel IOP ADMA support" + depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX + select DMA_ENGINE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for the Intel(R) IOP Series RAID engines. + +config DW_DMAC + tristate "Synopsys DesignWare AHB DMA support" + depends on HAVE_CLK + select DMA_ENGINE + default y if CPU_AT32AP7000 + help + Support the Synopsys DesignWare AHB DMA controller. This + can be integrated in chips such as the Atmel AT32ap7000. + +config AT_HDMAC + tristate "Atmel AHB DMA support" + depends on ARCH_AT91 + select DMA_ENGINE + help + Support the Atmel AHB DMA controller. + +config FSL_DMA + tristate "Freescale Elo and Elo Plus DMA support" + depends on FSL_SOC + select DMA_ENGINE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + ---help--- + Enable support for the Freescale Elo and Elo Plus DMA controllers. + The Elo is the DMA controller on some 82xx and 83xx parts, and the + Elo Plus is the DMA controller on 85xx and 86xx parts. + +config MPC512X_DMA + tristate "Freescale MPC512x built-in DMA engine support" + depends on PPC_MPC512x || PPC_MPC831x + select DMA_ENGINE + ---help--- + Enable support for the Freescale MPC512x built-in DMA engine. + +config MV_XOR + bool "Marvell XOR engine support" + depends on PLAT_ORION + select DMA_ENGINE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + ---help--- + Enable support for the Marvell XOR engine. + +config MX3_IPU + bool "MX3x Image Processing Unit support" + depends on ARCH_MXC + select DMA_ENGINE + default y + help + If you plan to use the Image Processing unit in the i.MX3x, say + Y here. If unsure, select Y. + +config MX3_IPU_IRQS + int "Number of dynamically mapped interrupts for IPU" + depends on MX3_IPU + range 2 137 + default 4 + help + Out of 137 interrupt sources on i.MX31 IPU only very few are used. + To avoid bloating the irq_desc[] array we allocate a sufficient + number of IRQ slots and map them dynamically to specific sources. + +config TXX9_DMAC + tristate "Toshiba TXx9 SoC DMA support" + depends on MACH_TX49XX || MACH_TX39XX + select DMA_ENGINE + help + Support the TXx9 SoC internal DMA controller. This can be + integrated in chips such as the Toshiba TX4927/38/39. + +config SH_DMAE + tristate "Renesas SuperH DMAC support" + depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE) + depends on !SH_DMA_API + select DMA_ENGINE + help + Enable support for the Renesas SuperH DMA controllers. + +config COH901318 + bool "ST-Ericsson COH901318 DMA support" + select DMA_ENGINE + depends on ARCH_U300 + help + Enable support for ST-Ericsson COH 901 318 DMA. + +config STE_DMA40 + bool "ST-Ericsson DMA40 support" + depends on ARCH_U8500 + select DMA_ENGINE + help + Support for ST-Ericsson DMA40 controller + +config AMCC_PPC440SPE_ADMA + tristate "AMCC PPC440SPe ADMA support" + depends on 440SPe || 440SP + select DMA_ENGINE + select ARCH_HAS_ASYNC_TX_FIND_CHANNEL + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + help + Enable support for the AMCC PPC440SPe RAID engines. + +config TIMB_DMA + tristate "Timberdale FPGA DMA support" + depends on MFD_TIMBERDALE || HAS_IOMEM + select DMA_ENGINE + help + Enable support for the Timberdale FPGA DMA engine. + +config SIRF_DMA + tristate "CSR SiRFprimaII DMA support" + depends on ARCH_PRIMA2 + select DMA_ENGINE + help + Enable support for the CSR SiRFprimaII DMA engine. + +config ARCH_HAS_ASYNC_TX_FIND_CHANNEL + bool + +config PL330_DMA + tristate "DMA API Driver for PL330" + select DMA_ENGINE + depends on ARM_AMBA + help + Select if your platform has one or more PL330 DMACs. + You need to provide platform specific settings via + platform_data for a dma-pl330 device. + +config PCH_DMA + tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA" + depends on PCI && X86 + select DMA_ENGINE + help + Enable support for Intel EG20T PCH DMA engine. + + This driver also can be used for LAPIS Semiconductor IOH(Input/ + Output Hub), ML7213, ML7223 and ML7831. + ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is + for MP(Media Phone) use and ML7831 IOH is for general purpose use. + ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. + +config IMX_SDMA + tristate "i.MX SDMA support" + depends on ARCH_MXC + select DMA_ENGINE + help + Support the i.MX SDMA engine. This engine is integrated into + Freescale i.MX25/31/35/51/53 chips. + +config IMX_DMA + tristate "i.MX DMA support" + depends on ARCH_MXC + select DMA_ENGINE + help + Support the i.MX DMA engine. This engine is integrated into + Freescale i.MX1/21/27 chips. + +config MXS_DMA + bool "MXS DMA support" + depends on SOC_IMX23 || SOC_IMX28 + select DMA_ENGINE + help + Support the MXS DMA engine. This engine including APBH-DMA + and APBX-DMA is integrated into Freescale i.MX23/28 chips. + +config EP93XX_DMA + bool "Cirrus Logic EP93xx DMA support" + depends on ARCH_EP93XX + select DMA_ENGINE + help + Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + +config DMA_SA11X0 + tristate "SA-11x0 DMA support" + depends on ARCH_SA1100 + select DMA_ENGINE + help + Support the DMA engine found on Intel StrongARM SA-1100 and + SA-1110 SoCs. This DMA engine can only be used with on-chip + devices. + +config DMA_ENGINE + bool + +comment "DMA Clients" + depends on DMA_ENGINE + +config NET_DMA + bool "Network: TCP receive copy offload" + depends on DMA_ENGINE && NET + default (INTEL_IOATDMA || FSL_DMA) + help + This enables the use of DMA engines in the network stack to + offload receive copy-to-user operations, freeing CPU cycles. + + Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise + say N. + +config ASYNC_TX_DMA + bool "Async_tx: Offload support for the async_tx api" + depends on DMA_ENGINE + help + This allows the async_tx api to take advantage of offload engines for + memcpy, memset, xor, and raid6 p+q operations. If your platform has + a dma engine that can perform raid operations and you have enabled + MD_RAID456 say Y. + + If unsure, say N. + +config DMATEST + tristate "DMA Test client" + depends on DMA_ENGINE + help + Simple DMA test client. Say N unless you're debugging a + DMA Device driver. + +endif diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile new file mode 100644 index 00000000..86b795ba --- /dev/null +++ b/drivers/dma/Makefile @@ -0,0 +1,30 @@ +ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG +ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG + +obj-$(CONFIG_DMA_ENGINE) += dmaengine.o +obj-$(CONFIG_NET_DMA) += iovlock.o +obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o +obj-$(CONFIG_DMATEST) += dmatest.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ +obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o +obj-$(CONFIG_FSL_DMA) += fsldma.o +obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o +obj-$(CONFIG_MV_XOR) += mv_xor.o +obj-$(CONFIG_DW_DMAC) += dw_dmac.o +obj-$(CONFIG_AT_HDMAC) += at_hdmac.o +obj-$(CONFIG_MX3_IPU) += ipu/ +obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_SH_DMAE) += shdma.o +obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ +obj-$(CONFIG_IMX_SDMA) += imx-sdma.o +obj-$(CONFIG_IMX_DMA) += imx-dma.o +obj-$(CONFIG_MXS_DMA) += mxs-dma.o +obj-$(CONFIG_TIMB_DMA) += timb_dma.o +obj-$(CONFIG_SIRF_DMA) += sirf-dma.o +obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o +obj-$(CONFIG_PL330_DMA) += pl330.o +obj-$(CONFIG_PCH_DMA) += pch_dma.o +obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o +obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o +obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o diff --git a/drivers/dma/TODO b/drivers/dma/TODO new file mode 100644 index 00000000..734ed020 --- /dev/null +++ b/drivers/dma/TODO @@ -0,0 +1,13 @@ +TODO for slave dma + +1. Move remaining drivers to use new slave interface +2. Remove old slave pointer machansim +3. Make issue_pending to start the transaction in below drivers + - mpc512x_dma + - imx-dma + - imx-sdma + - mxs-dma.c + - dw_dmac + - intel_mid_dma +4. Check other subsystems for dma drivers and merge/move to dmaengine +5. Remove dma_slave_config's dma direction. diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c new file mode 100644 index 00000000..3d704abd --- /dev/null +++ b/drivers/dma/amba-pl08x.c @@ -0,0 +1,2065 @@ +/* + * Copyright (c) 2006 ARM Ltd. + * Copyright (c) 2010 ST-Ericsson SA + * + * Author: Peter Pearse <peter.pearse@arm.com> + * Author: Linus Walleij <linus.walleij@stericsson.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is in this distribution in the file + * called COPYING. + * + * Documentation: ARM DDI 0196G == PL080 + * Documentation: ARM DDI 0218E == PL081 + * + * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any + * channel. + * + * The PL080 has 8 channels available for simultaneous use, and the PL081 + * has only two channels. So on these DMA controllers the number of channels + * and the number of incoming DMA signals are two totally different things. + * It is usually not possible to theoretically handle all physical signals, + * so a multiplexing scheme with possible denial of use is necessary. + * + * The PL080 has a dual bus master, PL081 has a single master. + * + * Memory to peripheral transfer may be visualized as + * Get data from memory to DMAC + * Until no data left + * On burst request from peripheral + * Destination burst from DMAC to peripheral + * Clear burst request + * Raise terminal count interrupt + * + * For peripherals with a FIFO: + * Source burst size == half the depth of the peripheral FIFO + * Destination burst size == the depth of the peripheral FIFO + * + * (Bursts are irrelevant for mem to mem transfers - there are no burst + * signals, the DMA controller will simply facilitate its AHB master.) + * + * ASSUMES default (little) endianness for DMA transfers + * + * The PL08x has two flow control settings: + * - DMAC flow control: the transfer size defines the number of transfers + * which occur for the current LLI entry, and the DMAC raises TC at the + * end of every LLI entry. Observed behaviour shows the DMAC listening + * to both the BREQ and SREQ signals (contrary to documented), + * transferring data if either is active. The LBREQ and LSREQ signals + * are ignored. + * + * - Peripheral flow control: the transfer size is ignored (and should be + * zero). The data is transferred from the current LLI entry, until + * after the final transfer signalled by LBREQ or LSREQ. The DMAC + * will then move to the next LLI entry. + * + * Global TODO: + * - Break out common code from arch/arm/mach-s3c64xx and share + */ +#include <linux/amba/bus.h> +#include <linux/amba/pl08x.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pm_runtime.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <asm/hardware/pl080.h> + +#include "dmaengine.h" + +#define DRIVER_NAME "pl08xdmac" + +static struct amba_driver pl08x_amba_driver; + +/** + * struct vendor_data - vendor-specific config parameters for PL08x derivatives + * @channels: the number of channels available in this variant + * @dualmaster: whether this version supports dual AHB masters or not. + */ +struct vendor_data { + u8 channels; + bool dualmaster; +}; + +/* + * PL08X private data structures + * An LLI struct - see PL08x TRM. Note that next uses bit[0] as a bus bit, + * start & end do not - their bus bit info is in cctl. Also note that these + * are fixed 32-bit quantities. + */ +struct pl08x_lli { + u32 src; + u32 dst; + u32 lli; + u32 cctl; +}; + +/** + * struct pl08x_driver_data - the local state holder for the PL08x + * @slave: slave engine for this instance + * @memcpy: memcpy engine for this instance + * @base: virtual memory base (remapped) for the PL08x + * @adev: the corresponding AMBA (PrimeCell) bus entry + * @vd: vendor data for this PL08x variant + * @pd: platform data passed in from the platform/machine + * @phy_chans: array of data for the physical channels + * @pool: a pool for the LLI descriptors + * @pool_ctr: counter of LLIs in the pool + * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI + * fetches + * @mem_buses: set to indicate memory transfers on AHB2. + * @lock: a spinlock for this struct + */ +struct pl08x_driver_data { + struct dma_device slave; + struct dma_device memcpy; + void __iomem *base; + struct amba_device *adev; + const struct vendor_data *vd; + struct pl08x_platform_data *pd; + struct pl08x_phy_chan *phy_chans; + struct dma_pool *pool; + int pool_ctr; + u8 lli_buses; + u8 mem_buses; + spinlock_t lock; +}; + +/* + * PL08X specific defines + */ + +/* Size (bytes) of each LLI buffer allocated for one transfer */ +# define PL08X_LLI_TSFR_SIZE 0x2000 + +/* Maximum times we call dma_pool_alloc on this pool without freeing */ +#define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli)) +#define PL08X_ALIGN 8 + +static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan) +{ + return container_of(chan, struct pl08x_dma_chan, chan); +} + +static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct pl08x_txd, tx); +} + +/* + * Physical channel handling + */ + +/* Whether a certain channel is busy or not */ +static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch) +{ + unsigned int val; + + val = readl(ch->base + PL080_CH_CONFIG); + return val & PL080_CONFIG_ACTIVE; +} + +/* + * Set the initial DMA register values i.e. those for the first LLI + * The next LLI pointer and the configuration interrupt bit have + * been set when the LLIs were constructed. Poke them into the hardware + * and start the transfer. + */ +static void pl08x_start_txd(struct pl08x_dma_chan *plchan, + struct pl08x_txd *txd) +{ + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_phy_chan *phychan = plchan->phychan; + struct pl08x_lli *lli = &txd->llis_va[0]; + u32 val; + + plchan->at = txd; + + /* Wait for channel inactive */ + while (pl08x_phy_channel_busy(phychan)) + cpu_relax(); + + dev_vdbg(&pl08x->adev->dev, + "WRITE channel %d: csrc=0x%08x, cdst=0x%08x, " + "clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n", + phychan->id, lli->src, lli->dst, lli->lli, lli->cctl, + txd->ccfg); + + writel(lli->src, phychan->base + PL080_CH_SRC_ADDR); + writel(lli->dst, phychan->base + PL080_CH_DST_ADDR); + writel(lli->lli, phychan->base + PL080_CH_LLI); + writel(lli->cctl, phychan->base + PL080_CH_CONTROL); + writel(txd->ccfg, phychan->base + PL080_CH_CONFIG); + + /* Enable the DMA channel */ + /* Do not access config register until channel shows as disabled */ + while (readl(pl08x->base + PL080_EN_CHAN) & (1 << phychan->id)) + cpu_relax(); + + /* Do not access config register until channel shows as inactive */ + val = readl(phychan->base + PL080_CH_CONFIG); + while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE)) + val = readl(phychan->base + PL080_CH_CONFIG); + + writel(val | PL080_CONFIG_ENABLE, phychan->base + PL080_CH_CONFIG); +} + +/* + * Pause the channel by setting the HALT bit. + * + * For M->P transfers, pause the DMAC first and then stop the peripheral - + * the FIFO can only drain if the peripheral is still requesting data. + * (note: this can still timeout if the DMAC FIFO never drains of data.) + * + * For P->M transfers, disable the peripheral first to stop it filling + * the DMAC FIFO, and then pause the DMAC. + */ +static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + int timeout; + + /* Set the HALT bit and wait for the FIFO to drain */ + val = readl(ch->base + PL080_CH_CONFIG); + val |= PL080_CONFIG_HALT; + writel(val, ch->base + PL080_CH_CONFIG); + + /* Wait for channel inactive */ + for (timeout = 1000; timeout; timeout--) { + if (!pl08x_phy_channel_busy(ch)) + break; + udelay(1); + } + if (pl08x_phy_channel_busy(ch)) + pr_err("pl08x: channel%u timeout waiting for pause\n", ch->id); +} + +static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch) +{ + u32 val; + + /* Clear the HALT bit */ + val = readl(ch->base + PL080_CH_CONFIG); + val &= ~PL080_CONFIG_HALT; + writel(val, ch->base + PL080_CH_CONFIG); +} + +/* + * pl08x_terminate_phy_chan() stops the channel, clears the FIFO and + * clears any pending interrupt status. This should not be used for + * an on-going transfer, but as a method of shutting down a channel + * (eg, when it's no longer used) or terminating a transfer. + */ +static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + u32 val = readl(ch->base + PL080_CH_CONFIG); + + val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK); + + writel(val, ch->base + PL080_CH_CONFIG); + + writel(1 << ch->id, pl08x->base + PL080_ERR_CLEAR); + writel(1 << ch->id, pl08x->base + PL080_TC_CLEAR); +} + +static inline u32 get_bytes_in_cctl(u32 cctl) +{ + /* The source width defines the number of bytes */ + u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK; + + switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) { + case PL080_WIDTH_8BIT: + break; + case PL080_WIDTH_16BIT: + bytes *= 2; + break; + case PL080_WIDTH_32BIT: + bytes *= 4; + break; + } + return bytes; +} + +/* The channel should be paused when calling this */ +static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) +{ + struct pl08x_phy_chan *ch; + struct pl08x_txd *txd; + unsigned long flags; + size_t bytes = 0; + + spin_lock_irqsave(&plchan->lock, flags); + ch = plchan->phychan; + txd = plchan->at; + + /* + * Follow the LLIs to get the number of remaining + * bytes in the currently active transaction. + */ + if (ch && txd) { + u32 clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2; + + /* First get the remaining bytes in the active transfer */ + bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL)); + + if (clli) { + struct pl08x_lli *llis_va = txd->llis_va; + dma_addr_t llis_bus = txd->llis_bus; + int index; + + BUG_ON(clli < llis_bus || clli >= llis_bus + + sizeof(struct pl08x_lli) * MAX_NUM_TSFR_LLIS); + + /* + * Locate the next LLI - as this is an array, + * it's simple maths to find. + */ + index = (clli - llis_bus) / sizeof(struct pl08x_lli); + + for (; index < MAX_NUM_TSFR_LLIS; index++) { + bytes += get_bytes_in_cctl(llis_va[index].cctl); + + /* + * A LLI pointer of 0 terminates the LLI list + */ + if (!llis_va[index].lli) + break; + } + } + } + + /* Sum up all queued transactions */ + if (!list_empty(&plchan->pend_list)) { + struct pl08x_txd *txdi; + list_for_each_entry(txdi, &plchan->pend_list, node) { + struct pl08x_sg *dsg; + list_for_each_entry(dsg, &txd->dsg_list, node) + bytes += dsg->len; + } + } + + spin_unlock_irqrestore(&plchan->lock, flags); + + return bytes; +} + +/* + * Allocate a physical channel for a virtual channel + * + * Try to locate a physical channel to be used for this transfer. If all + * are taken return NULL and the requester will have to cope by using + * some fallback PIO mode or retrying later. + */ +static struct pl08x_phy_chan * +pl08x_get_phy_channel(struct pl08x_driver_data *pl08x, + struct pl08x_dma_chan *virt_chan) +{ + struct pl08x_phy_chan *ch = NULL; + unsigned long flags; + int i; + + for (i = 0; i < pl08x->vd->channels; i++) { + ch = &pl08x->phy_chans[i]; + + spin_lock_irqsave(&ch->lock, flags); + + if (!ch->serving) { + ch->serving = virt_chan; + ch->signal = -1; + spin_unlock_irqrestore(&ch->lock, flags); + break; + } + + spin_unlock_irqrestore(&ch->lock, flags); + } + + if (i == pl08x->vd->channels) { + /* No physical channel available, cope with it */ + return NULL; + } + + pm_runtime_get_sync(&pl08x->adev->dev); + return ch; +} + +static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x, + struct pl08x_phy_chan *ch) +{ + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + + /* Stop the channel and clear its interrupts */ + pl08x_terminate_phy_chan(pl08x, ch); + + pm_runtime_put(&pl08x->adev->dev); + + /* Mark it as free */ + ch->serving = NULL; + spin_unlock_irqrestore(&ch->lock, flags); +} + +/* + * LLI handling + */ + +static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded) +{ + switch (coded) { + case PL080_WIDTH_8BIT: + return 1; + case PL080_WIDTH_16BIT: + return 2; + case PL080_WIDTH_32BIT: + return 4; + default: + break; + } + BUG(); + return 0; +} + +static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, + size_t tsize) +{ + u32 retbits = cctl; + + /* Remove all src, dst and transfer size bits */ + retbits &= ~PL080_CONTROL_DWIDTH_MASK; + retbits &= ~PL080_CONTROL_SWIDTH_MASK; + retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK; + + /* Then set the bits according to the parameters */ + switch (srcwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + switch (dstwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT; + return retbits; +} + +struct pl08x_lli_build_data { + struct pl08x_txd *txd; + struct pl08x_bus_data srcbus; + struct pl08x_bus_data dstbus; + size_t remainder; + u32 lli_bus; +}; + +/* + * Autoselect a master bus to use for the transfer. Slave will be the chosen as + * victim in case src & dest are not similarly aligned. i.e. If after aligning + * masters address with width requirements of transfer (by sending few byte by + * byte data), slave is still not aligned, then its width will be reduced to + * BYTE. + * - prefers the destination bus if both available + * - prefers bus with fixed address (i.e. peripheral) + */ +static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd, + struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl) +{ + if (!(cctl & PL080_CONTROL_DST_INCR)) { + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; + } else if (!(cctl & PL080_CONTROL_SRC_INCR)) { + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; + } else { + if (bd->dstbus.buswidth >= bd->srcbus.buswidth) { + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; + } else { + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; + } + } +} + +/* + * Fills in one LLI for a certain transfer descriptor and advance the counter + */ +static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd, + int num_llis, int len, u32 cctl) +{ + struct pl08x_lli *llis_va = bd->txd->llis_va; + dma_addr_t llis_bus = bd->txd->llis_bus; + + BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS); + + llis_va[num_llis].cctl = cctl; + llis_va[num_llis].src = bd->srcbus.addr; + llis_va[num_llis].dst = bd->dstbus.addr; + llis_va[num_llis].lli = llis_bus + (num_llis + 1) * + sizeof(struct pl08x_lli); + llis_va[num_llis].lli |= bd->lli_bus; + + if (cctl & PL080_CONTROL_SRC_INCR) + bd->srcbus.addr += len; + if (cctl & PL080_CONTROL_DST_INCR) + bd->dstbus.addr += len; + + BUG_ON(bd->remainder < len); + + bd->remainder -= len; +} + +static inline void prep_byte_width_lli(struct pl08x_lli_build_data *bd, + u32 *cctl, u32 len, int num_llis, size_t *total_bytes) +{ + *cctl = pl08x_cctl_bits(*cctl, 1, 1, len); + pl08x_fill_lli_for_desc(bd, num_llis, len, *cctl); + (*total_bytes) += len; +} + +/* + * This fills in the table of LLIs for the transfer descriptor + * Note that we assume we never have to change the burst sizes + * Return 0 for error + */ +static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd) +{ + struct pl08x_bus_data *mbus, *sbus; + struct pl08x_lli_build_data bd; + int num_llis = 0; + u32 cctl, early_bytes = 0; + size_t max_bytes_per_lli, total_bytes; + struct pl08x_lli *llis_va; + struct pl08x_sg *dsg; + + txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus); + if (!txd->llis_va) { + dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__); + return 0; + } + + pl08x->pool_ctr++; + + bd.txd = txd; + bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0; + cctl = txd->cctl; + + /* Find maximum width of the source bus */ + bd.srcbus.maxwidth = + pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >> + PL080_CONTROL_SWIDTH_SHIFT); + + /* Find maximum width of the destination bus */ + bd.dstbus.maxwidth = + pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >> + PL080_CONTROL_DWIDTH_SHIFT); + + list_for_each_entry(dsg, &txd->dsg_list, node) { + total_bytes = 0; + cctl = txd->cctl; + + bd.srcbus.addr = dsg->src_addr; + bd.dstbus.addr = dsg->dst_addr; + bd.remainder = dsg->len; + bd.srcbus.buswidth = bd.srcbus.maxwidth; + bd.dstbus.buswidth = bd.dstbus.maxwidth; + + pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); + + dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu\n", + bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "", + bd.srcbus.buswidth, + bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "", + bd.dstbus.buswidth, + bd.remainder); + dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n", + mbus == &bd.srcbus ? "src" : "dst", + sbus == &bd.srcbus ? "src" : "dst"); + + /* + * Zero length is only allowed if all these requirements are + * met: + * - flow controller is peripheral. + * - src.addr is aligned to src.width + * - dst.addr is aligned to dst.width + * + * sg_len == 1 should be true, as there can be two cases here: + * + * - Memory addresses are contiguous and are not scattered. + * Here, Only one sg will be passed by user driver, with + * memory address and zero length. We pass this to controller + * and after the transfer it will receive the last burst + * request from peripheral and so transfer finishes. + * + * - Memory addresses are scattered and are not contiguous. + * Here, Obviously as DMA controller doesn't know when a lli's + * transfer gets over, it can't load next lli. So in this + * case, there has to be an assumption that only one lli is + * supported. Thus, we can't have scattered addresses. + */ + if (!bd.remainder) { + u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >> + PL080_CONFIG_FLOW_CONTROL_SHIFT; + if (!((fc >= PL080_FLOW_SRC2DST_DST) && + (fc <= PL080_FLOW_SRC2DST_SRC))) { + dev_err(&pl08x->adev->dev, "%s sg len can't be zero", + __func__); + return 0; + } + + if ((bd.srcbus.addr % bd.srcbus.buswidth) || + (bd.dstbus.addr % bd.dstbus.buswidth)) { + dev_err(&pl08x->adev->dev, + "%s src & dst address must be aligned to src" + " & dst width if peripheral is flow controller", + __func__); + return 0; + } + + cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth, + bd.dstbus.buswidth, 0); + pl08x_fill_lli_for_desc(&bd, num_llis++, 0, cctl); + break; + } + + /* + * Send byte by byte for following cases + * - Less than a bus width available + * - until master bus is aligned + */ + if (bd.remainder < mbus->buswidth) + early_bytes = bd.remainder; + else if ((mbus->addr) % (mbus->buswidth)) { + early_bytes = mbus->buswidth - (mbus->addr) % + (mbus->buswidth); + if ((bd.remainder - early_bytes) < mbus->buswidth) + early_bytes = bd.remainder; + } + + if (early_bytes) { + dev_vdbg(&pl08x->adev->dev, + "%s byte width LLIs (remain 0x%08x)\n", + __func__, bd.remainder); + prep_byte_width_lli(&bd, &cctl, early_bytes, num_llis++, + &total_bytes); + } + + if (bd.remainder) { + /* + * Master now aligned + * - if slave is not then we must set its width down + */ + if (sbus->addr % sbus->buswidth) { + dev_dbg(&pl08x->adev->dev, + "%s set down bus width to one byte\n", + __func__); + + sbus->buswidth = 1; + } + + /* + * Bytes transferred = tsize * src width, not + * MIN(buswidths) + */ + max_bytes_per_lli = bd.srcbus.buswidth * + PL080_CONTROL_TRANSFER_SIZE_MASK; + dev_vdbg(&pl08x->adev->dev, + "%s max bytes per lli = %zu\n", + __func__, max_bytes_per_lli); + + /* + * Make largest possible LLIs until less than one bus + * width left + */ + while (bd.remainder > (mbus->buswidth - 1)) { + size_t lli_len, tsize, width; + + /* + * If enough left try to send max possible, + * otherwise try to send the remainder + */ + lli_len = min(bd.remainder, max_bytes_per_lli); + + /* + * Check against maximum bus alignment: + * Calculate actual transfer size in relation to + * bus width an get a maximum remainder of the + * highest bus width - 1 + */ + width = max(mbus->buswidth, sbus->buswidth); + lli_len = (lli_len / width) * width; + tsize = lli_len / bd.srcbus.buswidth; + + dev_vdbg(&pl08x->adev->dev, + "%s fill lli with single lli chunk of " + "size 0x%08zx (remainder 0x%08zx)\n", + __func__, lli_len, bd.remainder); + + cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth, + bd.dstbus.buswidth, tsize); + pl08x_fill_lli_for_desc(&bd, num_llis++, + lli_len, cctl); + total_bytes += lli_len; + } + + /* + * Send any odd bytes + */ + if (bd.remainder) { + dev_vdbg(&pl08x->adev->dev, + "%s align with boundary, send odd bytes (remain %zu)\n", + __func__, bd.remainder); + prep_byte_width_lli(&bd, &cctl, bd.remainder, + num_llis++, &total_bytes); + } + } + + if (total_bytes != dsg->len) { + dev_err(&pl08x->adev->dev, + "%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n", + __func__, total_bytes, dsg->len); + return 0; + } + + if (num_llis >= MAX_NUM_TSFR_LLIS) { + dev_err(&pl08x->adev->dev, + "%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n", + __func__, (u32) MAX_NUM_TSFR_LLIS); + return 0; + } + } + + llis_va = txd->llis_va; + /* The final LLI terminates the LLI. */ + llis_va[num_llis - 1].lli = 0; + /* The final LLI element shall also fire an interrupt. */ + llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN; + +#ifdef VERBOSE_DEBUG + { + int i; + + dev_vdbg(&pl08x->adev->dev, + "%-3s %-9s %-10s %-10s %-10s %s\n", + "lli", "", "csrc", "cdst", "clli", "cctl"); + for (i = 0; i < num_llis; i++) { + dev_vdbg(&pl08x->adev->dev, + "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, &llis_va[i], llis_va[i].src, + llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl + ); + } + } +#endif + + return num_llis; +} + +/* You should call this with the struct pl08x lock held */ +static void pl08x_free_txd(struct pl08x_driver_data *pl08x, + struct pl08x_txd *txd) +{ + struct pl08x_sg *dsg, *_dsg; + + /* Free the LLI */ + if (txd->llis_va) + dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus); + + pl08x->pool_ctr--; + + list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) { + list_del(&dsg->node); + kfree(dsg); + } + + kfree(txd); +} + +static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x, + struct pl08x_dma_chan *plchan) +{ + struct pl08x_txd *txdi = NULL; + struct pl08x_txd *next; + + if (!list_empty(&plchan->pend_list)) { + list_for_each_entry_safe(txdi, + next, &plchan->pend_list, node) { + list_del(&txdi->node); + pl08x_free_txd(pl08x, txdi); + } + } +} + +/* + * The DMA ENGINE API + */ +static int pl08x_alloc_chan_resources(struct dma_chan *chan) +{ + return 0; +} + +static void pl08x_free_chan_resources(struct dma_chan *chan) +{ +} + +/* + * This should be called with the channel plchan->lock held + */ +static int prep_phy_channel(struct pl08x_dma_chan *plchan, + struct pl08x_txd *txd) +{ + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_phy_chan *ch; + int ret; + + /* Check if we already have a channel */ + if (plchan->phychan) { + ch = plchan->phychan; + goto got_channel; + } + + ch = pl08x_get_phy_channel(pl08x, plchan); + if (!ch) { + /* No physical channel available, cope with it */ + dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name); + return -EBUSY; + } + + /* + * OK we have a physical channel: for memcpy() this is all we + * need, but for slaves the physical signals may be muxed! + * Can the platform allow us to use this channel? + */ + if (plchan->slave && pl08x->pd->get_signal) { + ret = pl08x->pd->get_signal(plchan); + if (ret < 0) { + dev_dbg(&pl08x->adev->dev, + "unable to use physical channel %d for transfer on %s due to platform restrictions\n", + ch->id, plchan->name); + /* Release physical channel & return */ + pl08x_put_phy_channel(pl08x, ch); + return -EBUSY; + } + ch->signal = ret; + } + + plchan->phychan = ch; + dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n", + ch->id, + ch->signal, + plchan->name); + +got_channel: + /* Assign the flow control signal to this channel */ + if (txd->direction == DMA_MEM_TO_DEV) + txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT; + else if (txd->direction == DMA_DEV_TO_MEM) + txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT; + + plchan->phychan_hold++; + + return 0; +} + +static void release_phy_channel(struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + + if ((plchan->phychan->signal >= 0) && pl08x->pd->put_signal) { + pl08x->pd->put_signal(plchan); + plchan->phychan->signal = -1; + } + pl08x_put_phy_channel(pl08x, plchan->phychan); + plchan->phychan = NULL; +} + +static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan); + struct pl08x_txd *txd = to_pl08x_txd(tx); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&plchan->lock, flags); + cookie = dma_cookie_assign(tx); + + /* Put this onto the pending list */ + list_add_tail(&txd->node, &plchan->pend_list); + + /* + * If there was no physical channel available for this memcpy, + * stack the request up and indicate that the channel is waiting + * for a free physical channel. + */ + if (!plchan->slave && !plchan->phychan) { + /* Do this memcpy whenever there is a channel ready */ + plchan->state = PL08X_CHAN_WAITING; + plchan->waiting = txd; + } else { + plchan->phychan_hold--; + } + + spin_unlock_irqrestore(&plchan->lock, flags); + + return cookie; +} + +static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt( + struct dma_chan *chan, unsigned long flags) +{ + struct dma_async_tx_descriptor *retval = NULL; + + return retval; +} + +/* + * Code accessing dma_async_is_complete() in a tight loop may give problems. + * If slaves are relying on interrupts to signal completion this function + * must not be called with interrupts disabled. + */ +static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_SUCCESS) + return ret; + + /* + * This cookie not complete yet + * Get number of bytes left in the active transactions and queue + */ + dma_set_residue(txstate, pl08x_getbytes_chan(plchan)); + + if (plchan->state == PL08X_CHAN_PAUSED) + return DMA_PAUSED; + + /* Whether waiting or running, we're in progress */ + return DMA_IN_PROGRESS; +} + +/* PrimeCell DMA extension */ +struct burst_table { + u32 burstwords; + u32 reg; +}; + +static const struct burst_table burst_sizes[] = { + { + .burstwords = 256, + .reg = PL080_BSIZE_256, + }, + { + .burstwords = 128, + .reg = PL080_BSIZE_128, + }, + { + .burstwords = 64, + .reg = PL080_BSIZE_64, + }, + { + .burstwords = 32, + .reg = PL080_BSIZE_32, + }, + { + .burstwords = 16, + .reg = PL080_BSIZE_16, + }, + { + .burstwords = 8, + .reg = PL080_BSIZE_8, + }, + { + .burstwords = 4, + .reg = PL080_BSIZE_4, + }, + { + .burstwords = 0, + .reg = PL080_BSIZE_1, + }, +}; + +/* + * Given the source and destination available bus masks, select which + * will be routed to each port. We try to have source and destination + * on separate ports, but always respect the allowable settings. + */ +static u32 pl08x_select_bus(u8 src, u8 dst) +{ + u32 cctl = 0; + + if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) + cctl |= PL080_CONTROL_DST_AHB2; + if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) + cctl |= PL080_CONTROL_SRC_AHB2; + + return cctl; +} + +static u32 pl08x_cctl(u32 cctl) +{ + cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | + PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | + PL080_CONTROL_PROT_MASK); + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + return cctl | PL080_CONTROL_PROT_SYS; +} + +static u32 pl08x_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return PL080_WIDTH_8BIT; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return PL080_WIDTH_16BIT; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return PL080_WIDTH_32BIT; + default: + return ~0; + } +} + +static u32 pl08x_burst(u32 maxburst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) + if (burst_sizes[i].burstwords <= maxburst) + break; + + return burst_sizes[i].reg; +} + +static int dma_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + enum dma_slave_buswidth addr_width; + u32 width, burst, maxburst; + u32 cctl = 0; + + if (!plchan->slave) + return -EINVAL; + + /* Transfer direction */ + plchan->runtime_direction = config->direction; + if (config->direction == DMA_MEM_TO_DEV) { + addr_width = config->dst_addr_width; + maxburst = config->dst_maxburst; + } else if (config->direction == DMA_DEV_TO_MEM) { + addr_width = config->src_addr_width; + maxburst = config->src_maxburst; + } else { + dev_err(&pl08x->adev->dev, + "bad runtime_config: alien transfer direction\n"); + return -EINVAL; + } + + width = pl08x_width(addr_width); + if (width == ~0) { + dev_err(&pl08x->adev->dev, + "bad runtime_config: alien address width\n"); + return -EINVAL; + } + + cctl |= width << PL080_CONTROL_SWIDTH_SHIFT; + cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; + + /* + * If this channel will only request single transfers, set this + * down to ONE element. Also select one element if no maxburst + * is specified. + */ + if (plchan->cd->single) + maxburst = 1; + + burst = pl08x_burst(maxburst); + cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT; + cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT; + + plchan->device_fc = config->device_fc; + + if (plchan->runtime_direction == DMA_DEV_TO_MEM) { + plchan->src_addr = config->src_addr; + plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR | + pl08x_select_bus(plchan->cd->periph_buses, + pl08x->mem_buses); + } else { + plchan->dst_addr = config->dst_addr; + plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR | + pl08x_select_bus(pl08x->mem_buses, + plchan->cd->periph_buses); + } + + dev_dbg(&pl08x->adev->dev, + "configured channel %s (%s) for %s, data width %d, " + "maxburst %d words, LE, CCTL=0x%08x\n", + dma_chan_name(chan), plchan->name, + (config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX", + addr_width, + maxburst, + cctl); + + return 0; +} + +/* + * Slave transactions callback to the slave device to allow + * synchronization of slave DMA signals with the DMAC enable + */ +static void pl08x_issue_pending(struct dma_chan *chan) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&plchan->lock, flags); + /* Something is already active, or we're waiting for a channel... */ + if (plchan->at || plchan->state == PL08X_CHAN_WAITING) { + spin_unlock_irqrestore(&plchan->lock, flags); + return; + } + + /* Take the first element in the queue and execute it */ + if (!list_empty(&plchan->pend_list)) { + struct pl08x_txd *next; + + next = list_first_entry(&plchan->pend_list, + struct pl08x_txd, + node); + list_del(&next->node); + plchan->state = PL08X_CHAN_RUNNING; + + pl08x_start_txd(plchan, next); + } + + spin_unlock_irqrestore(&plchan->lock, flags); +} + +static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, + struct pl08x_txd *txd) +{ + struct pl08x_driver_data *pl08x = plchan->host; + unsigned long flags; + int num_llis, ret; + + num_llis = pl08x_fill_llis_for_desc(pl08x, txd); + if (!num_llis) { + spin_lock_irqsave(&plchan->lock, flags); + pl08x_free_txd(pl08x, txd); + spin_unlock_irqrestore(&plchan->lock, flags); + return -EINVAL; + } + + spin_lock_irqsave(&plchan->lock, flags); + + /* + * See if we already have a physical channel allocated, + * else this is the time to try to get one. + */ + ret = prep_phy_channel(plchan, txd); + if (ret) { + /* + * No physical channel was available. + * + * memcpy transfers can be sorted out at submission time. + * + * Slave transfers may have been denied due to platform + * channel muxing restrictions. Since there is no guarantee + * that this will ever be resolved, and the signal must be + * acquired AFTER acquiring the physical channel, we will let + * them be NACK:ed with -EBUSY here. The drivers can retry + * the prep() call if they are eager on doing this using DMA. + */ + if (plchan->slave) { + pl08x_free_txd_list(pl08x, plchan); + pl08x_free_txd(pl08x, txd); + spin_unlock_irqrestore(&plchan->lock, flags); + return -EBUSY; + } + } else + /* + * Else we're all set, paused and ready to roll, status + * will switch to PL08X_CHAN_RUNNING when we call + * issue_pending(). If there is something running on the + * channel already we don't change its state. + */ + if (plchan->state == PL08X_CHAN_IDLE) + plchan->state = PL08X_CHAN_PAUSED; + + spin_unlock_irqrestore(&plchan->lock, flags); + + return 0; +} + +static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan, + unsigned long flags) +{ + struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + + if (txd) { + dma_async_tx_descriptor_init(&txd->tx, &plchan->chan); + txd->tx.flags = flags; + txd->tx.tx_submit = pl08x_tx_submit; + INIT_LIST_HEAD(&txd->node); + INIT_LIST_HEAD(&txd->dsg_list); + + /* Always enable error and terminal interrupts */ + txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK; + } + return txd; +} + +/* + * Initialize a descriptor to be used by memcpy submit + */ +static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + struct pl08x_sg *dsg; + int ret; + + txd = pl08x_get_txd(plchan, flags); + if (!txd) { + dev_err(&pl08x->adev->dev, + "%s no memory for descriptor\n", __func__); + return NULL; + } + + dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT); + if (!dsg) { + pl08x_free_txd(pl08x, txd); + dev_err(&pl08x->adev->dev, "%s no memory for pl080 sg\n", + __func__); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + txd->direction = DMA_NONE; + dsg->src_addr = src; + dsg->dst_addr = dest; + dsg->len = len; + + /* Set platform data for m2m */ + txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->cctl = pl08x->pd->memcpy_channel.cctl & + ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2); + + /* Both to be incremented or the code will break */ + txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; + + if (pl08x->vd->dualmaster) + txd->cctl |= pl08x_select_bus(pl08x->mem_buses, + pl08x->mem_buses); + + ret = pl08x_prep_channel_resources(plchan, txd); + if (ret) + return NULL; + + return &txd->tx; +} + +static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + struct pl08x_sg *dsg; + struct scatterlist *sg; + dma_addr_t slave_addr; + int ret, tmp; + + dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n", + __func__, sgl->length, plchan->name); + + txd = pl08x_get_txd(plchan, flags); + if (!txd) { + dev_err(&pl08x->adev->dev, "%s no txd\n", __func__); + return NULL; + } + + if (direction != plchan->runtime_direction) + dev_err(&pl08x->adev->dev, "%s DMA setup does not match " + "the direction configured for the PrimeCell\n", + __func__); + + /* + * Set up addresses, the PrimeCell configured address + * will take precedence since this may configure the + * channel target address dynamically at runtime. + */ + txd->direction = direction; + + if (direction == DMA_MEM_TO_DEV) { + txd->cctl = plchan->dst_cctl; + slave_addr = plchan->dst_addr; + } else if (direction == DMA_DEV_TO_MEM) { + txd->cctl = plchan->src_cctl; + slave_addr = plchan->src_addr; + } else { + pl08x_free_txd(pl08x, txd); + dev_err(&pl08x->adev->dev, + "%s direction unsupported\n", __func__); + return NULL; + } + + if (plchan->device_fc) + tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER : + PL080_FLOW_PER2MEM_PER; + else + tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER : + PL080_FLOW_PER2MEM; + + txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT; + + for_each_sg(sgl, sg, sg_len, tmp) { + dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT); + if (!dsg) { + pl08x_free_txd(pl08x, txd); + dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n", + __func__); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->len = sg_dma_len(sg); + if (direction == DMA_MEM_TO_DEV) { + dsg->src_addr = sg_phys(sg); + dsg->dst_addr = slave_addr; + } else { + dsg->src_addr = slave_addr; + dsg->dst_addr = sg_phys(sg); + } + } + + ret = pl08x_prep_channel_resources(plchan, txd); + if (ret) + return NULL; + + return &txd->tx; +} + +static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); + struct pl08x_driver_data *pl08x = plchan->host; + unsigned long flags; + int ret = 0; + + /* Controls applicable to inactive channels */ + if (cmd == DMA_SLAVE_CONFIG) { + return dma_set_runtime_config(chan, + (struct dma_slave_config *)arg); + } + + /* + * Anything succeeds on channels with no physical allocation and + * no queued transfers. + */ + spin_lock_irqsave(&plchan->lock, flags); + if (!plchan->phychan && !plchan->at) { + spin_unlock_irqrestore(&plchan->lock, flags); + return 0; + } + + switch (cmd) { + case DMA_TERMINATE_ALL: + plchan->state = PL08X_CHAN_IDLE; + + if (plchan->phychan) { + pl08x_terminate_phy_chan(pl08x, plchan->phychan); + + /* + * Mark physical channel as free and free any slave + * signal + */ + release_phy_channel(plchan); + plchan->phychan_hold = 0; + } + /* Dequeue jobs and free LLIs */ + if (plchan->at) { + pl08x_free_txd(pl08x, plchan->at); + plchan->at = NULL; + } + /* Dequeue jobs not yet fired as well */ + pl08x_free_txd_list(pl08x, plchan); + break; + case DMA_PAUSE: + pl08x_pause_phy_chan(plchan->phychan); + plchan->state = PL08X_CHAN_PAUSED; + break; + case DMA_RESUME: + pl08x_resume_phy_chan(plchan->phychan); + plchan->state = PL08X_CHAN_RUNNING; + break; + default: + /* Unknown command */ + ret = -ENXIO; + break; + } + + spin_unlock_irqrestore(&plchan->lock, flags); + + return ret; +} + +bool pl08x_filter_id(struct dma_chan *chan, void *chan_id) +{ + struct pl08x_dma_chan *plchan; + char *name = chan_id; + + /* Reject channels for devices not bound to this driver */ + if (chan->device->dev->driver != &pl08x_amba_driver.drv) + return false; + + plchan = to_pl08x_chan(chan); + + /* Check that the channel is not taken! */ + if (!strcmp(plchan->name, name)) + return true; + + return false; +} + +/* + * Just check that the device is there and active + * TODO: turn this bit on/off depending on the number of physical channels + * actually used, if it is zero... well shut it off. That will save some + * power. Cut the clock at the same time. + */ +static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) +{ + writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG); +} + +static void pl08x_unmap_buffers(struct pl08x_txd *txd) +{ + struct device *dev = txd->tx.chan->device->dev; + struct pl08x_sg *dsg; + + if (!(txd->tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + list_for_each_entry(dsg, &txd->dsg_list, node) + dma_unmap_single(dev, dsg->src_addr, dsg->len, + DMA_TO_DEVICE); + else { + list_for_each_entry(dsg, &txd->dsg_list, node) + dma_unmap_page(dev, dsg->src_addr, dsg->len, + DMA_TO_DEVICE); + } + } + if (!(txd->tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + list_for_each_entry(dsg, &txd->dsg_list, node) + dma_unmap_single(dev, dsg->dst_addr, dsg->len, + DMA_FROM_DEVICE); + else + list_for_each_entry(dsg, &txd->dsg_list, node) + dma_unmap_page(dev, dsg->dst_addr, dsg->len, + DMA_FROM_DEVICE); + } +} + +static void pl08x_tasklet(unsigned long data) +{ + struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data; + struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + unsigned long flags; + + spin_lock_irqsave(&plchan->lock, flags); + + txd = plchan->at; + plchan->at = NULL; + + if (txd) { + /* Update last completed */ + dma_cookie_complete(&txd->tx); + } + + /* If a new descriptor is queued, set it up plchan->at is NULL here */ + if (!list_empty(&plchan->pend_list)) { + struct pl08x_txd *next; + + next = list_first_entry(&plchan->pend_list, + struct pl08x_txd, + node); + list_del(&next->node); + + pl08x_start_txd(plchan, next); + } else if (plchan->phychan_hold) { + /* + * This channel is still in use - we have a new txd being + * prepared and will soon be queued. Don't give up the + * physical channel. + */ + } else { + struct pl08x_dma_chan *waiting = NULL; + + /* + * No more jobs, so free up the physical channel + * Free any allocated signal on slave transfers too + */ + release_phy_channel(plchan); + plchan->state = PL08X_CHAN_IDLE; + + /* + * And NOW before anyone else can grab that free:d up + * physical channel, see if there is some memcpy pending + * that seriously needs to start because of being stacked + * up while we were choking the physical channels with data. + */ + list_for_each_entry(waiting, &pl08x->memcpy.channels, + chan.device_node) { + if (waiting->state == PL08X_CHAN_WAITING && + waiting->waiting != NULL) { + int ret; + + /* This should REALLY not fail now */ + ret = prep_phy_channel(waiting, + waiting->waiting); + BUG_ON(ret); + waiting->phychan_hold--; + waiting->state = PL08X_CHAN_RUNNING; + waiting->waiting = NULL; + pl08x_issue_pending(&waiting->chan); + break; + } + } + } + + spin_unlock_irqrestore(&plchan->lock, flags); + + if (txd) { + dma_async_tx_callback callback = txd->tx.callback; + void *callback_param = txd->tx.callback_param; + + /* Don't try to unmap buffers on slave channels */ + if (!plchan->slave) + pl08x_unmap_buffers(txd); + + /* Free the descriptor */ + spin_lock_irqsave(&plchan->lock, flags); + pl08x_free_txd(pl08x, txd); + spin_unlock_irqrestore(&plchan->lock, flags); + + /* Callback to signal completion */ + if (callback) + callback(callback_param); + } +} + +static irqreturn_t pl08x_irq(int irq, void *dev) +{ + struct pl08x_driver_data *pl08x = dev; + u32 mask = 0, err, tc, i; + + /* check & clear - ERR & TC interrupts */ + err = readl(pl08x->base + PL080_ERR_STATUS); + if (err) { + dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n", + __func__, err); + writel(err, pl08x->base + PL080_ERR_CLEAR); + } + tc = readl(pl08x->base + PL080_INT_STATUS); + if (tc) + writel(tc, pl08x->base + PL080_TC_CLEAR); + + if (!err && !tc) + return IRQ_NONE; + + for (i = 0; i < pl08x->vd->channels; i++) { + if (((1 << i) & err) || ((1 << i) & tc)) { + /* Locate physical channel */ + struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i]; + struct pl08x_dma_chan *plchan = phychan->serving; + + if (!plchan) { + dev_err(&pl08x->adev->dev, + "%s Error TC interrupt on unused channel: 0x%08x\n", + __func__, i); + continue; + } + + /* Schedule tasklet on this channel */ + tasklet_schedule(&plchan->tasklet); + mask |= (1 << i); + } + } + + return mask ? IRQ_HANDLED : IRQ_NONE; +} + +static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan) +{ + u32 cctl = pl08x_cctl(chan->cd->cctl); + + chan->slave = true; + chan->name = chan->cd->bus_id; + chan->src_addr = chan->cd->addr; + chan->dst_addr = chan->cd->addr; + chan->src_cctl = cctl | PL080_CONTROL_DST_INCR | + pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses); + chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR | + pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses); +} + +/* + * Initialise the DMAC memcpy/slave channels. + * Make a local wrapper to hold required data + */ +static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, + struct dma_device *dmadev, unsigned int channels, bool slave) +{ + struct pl08x_dma_chan *chan; + int i; + + INIT_LIST_HEAD(&dmadev->channels); + + /* + * Register as many many memcpy as we have physical channels, + * we won't always be able to use all but the code will have + * to cope with that situation. + */ + for (i = 0; i < channels; i++) { + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) { + dev_err(&pl08x->adev->dev, + "%s no memory for channel\n", __func__); + return -ENOMEM; + } + + chan->host = pl08x; + chan->state = PL08X_CHAN_IDLE; + + if (slave) { + chan->cd = &pl08x->pd->slave_channels[i]; + pl08x_dma_slave_init(chan); + } else { + chan->cd = &pl08x->pd->memcpy_channel; + chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); + if (!chan->name) { + kfree(chan); + return -ENOMEM; + } + } + if (chan->cd->circular_buffer) { + dev_err(&pl08x->adev->dev, + "channel %s: circular buffers not supported\n", + chan->name); + kfree(chan); + continue; + } + dev_dbg(&pl08x->adev->dev, + "initialize virtual channel \"%s\"\n", + chan->name); + + chan->chan.device = dmadev; + dma_cookie_init(&chan->chan); + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->pend_list); + tasklet_init(&chan->tasklet, pl08x_tasklet, + (unsigned long) chan); + + list_add_tail(&chan->chan.device_node, &dmadev->channels); + } + dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n", + i, slave ? "slave" : "memcpy"); + return i; +} + +static void pl08x_free_virtual_channels(struct dma_device *dmadev) +{ + struct pl08x_dma_chan *chan = NULL; + struct pl08x_dma_chan *next; + + list_for_each_entry_safe(chan, + next, &dmadev->channels, chan.device_node) { + list_del(&chan->chan.device_node); + kfree(chan); + } +} + +#ifdef CONFIG_DEBUG_FS +static const char *pl08x_state_str(enum pl08x_dma_chan_state state) +{ + switch (state) { + case PL08X_CHAN_IDLE: + return "idle"; + case PL08X_CHAN_RUNNING: + return "running"; + case PL08X_CHAN_PAUSED: + return "paused"; + case PL08X_CHAN_WAITING: + return "waiting"; + default: + break; + } + return "UNKNOWN STATE"; +} + +static int pl08x_debugfs_show(struct seq_file *s, void *data) +{ + struct pl08x_driver_data *pl08x = s->private; + struct pl08x_dma_chan *chan; + struct pl08x_phy_chan *ch; + unsigned long flags; + int i; + + seq_printf(s, "PL08x physical channels:\n"); + seq_printf(s, "CHANNEL:\tUSER:\n"); + seq_printf(s, "--------\t-----\n"); + for (i = 0; i < pl08x->vd->channels; i++) { + struct pl08x_dma_chan *virt_chan; + + ch = &pl08x->phy_chans[i]; + + spin_lock_irqsave(&ch->lock, flags); + virt_chan = ch->serving; + + seq_printf(s, "%d\t\t%s\n", + ch->id, virt_chan ? virt_chan->name : "(none)"); + + spin_unlock_irqrestore(&ch->lock, flags); + } + + seq_printf(s, "\nPL08x virtual memcpy channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) { + seq_printf(s, "%s\t\t%s\n", chan->name, + pl08x_state_str(chan->state)); + } + + seq_printf(s, "\nPL08x virtual slave channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) { + seq_printf(s, "%s\t\t%s\n", chan->name, + pl08x_state_str(chan->state)); + } + + return 0; +} + +static int pl08x_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, pl08x_debugfs_show, inode->i_private); +} + +static const struct file_operations pl08x_debugfs_operations = { + .open = pl08x_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) +{ + /* Expose a simple debugfs interface to view all clocks */ + (void) debugfs_create_file(dev_name(&pl08x->adev->dev), + S_IFREG | S_IRUGO, NULL, pl08x, + &pl08x_debugfs_operations); +} + +#else +static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) +{ +} +#endif + +static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct pl08x_driver_data *pl08x; + const struct vendor_data *vd = id->data; + int ret = 0; + int i; + + ret = amba_request_regions(adev, NULL); + if (ret) + return ret; + + /* Create the driver state holder */ + pl08x = kzalloc(sizeof(*pl08x), GFP_KERNEL); + if (!pl08x) { + ret = -ENOMEM; + goto out_no_pl08x; + } + + pm_runtime_set_active(&adev->dev); + pm_runtime_enable(&adev->dev); + + /* Initialize memcpy engine */ + dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); + pl08x->memcpy.dev = &adev->dev; + pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources; + pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources; + pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy; + pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt; + pl08x->memcpy.device_tx_status = pl08x_dma_tx_status; + pl08x->memcpy.device_issue_pending = pl08x_issue_pending; + pl08x->memcpy.device_control = pl08x_control; + + /* Initialize slave engine */ + dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask); + pl08x->slave.dev = &adev->dev; + pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources; + pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources; + pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt; + pl08x->slave.device_tx_status = pl08x_dma_tx_status; + pl08x->slave.device_issue_pending = pl08x_issue_pending; + pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg; + pl08x->slave.device_control = pl08x_control; + + /* Get the platform data */ + pl08x->pd = dev_get_platdata(&adev->dev); + if (!pl08x->pd) { + dev_err(&adev->dev, "no platform data supplied\n"); + goto out_no_platdata; + } + + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + + /* By default, AHB1 only. If dualmaster, from platform */ + pl08x->lli_buses = PL08X_AHB1; + pl08x->mem_buses = PL08X_AHB1; + if (pl08x->vd->dualmaster) { + pl08x->lli_buses = pl08x->pd->lli_buses; + pl08x->mem_buses = pl08x->pd->mem_buses; + } + + /* A DMA memory pool for LLIs, align on 1-byte boundary */ + pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev, + PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0); + if (!pl08x->pool) { + ret = -ENOMEM; + goto out_no_lli_pool; + } + + spin_lock_init(&pl08x->lock); + + pl08x->base = ioremap(adev->res.start, resource_size(&adev->res)); + if (!pl08x->base) { + ret = -ENOMEM; + goto out_no_ioremap; + } + + /* Turn on the PL08x */ + pl08x_ensure_on(pl08x); + + /* Attach the interrupt handler */ + writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); + writel(0x000000FF, pl08x->base + PL080_TC_CLEAR); + + ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED, + DRIVER_NAME, pl08x); + if (ret) { + dev_err(&adev->dev, "%s failed to request interrupt %d\n", + __func__, adev->irq[0]); + goto out_no_irq; + } + + /* Initialize physical channels */ + pl08x->phy_chans = kmalloc((vd->channels * sizeof(*pl08x->phy_chans)), + GFP_KERNEL); + if (!pl08x->phy_chans) { + dev_err(&adev->dev, "%s failed to allocate " + "physical channel holders\n", + __func__); + goto out_no_phychans; + } + + for (i = 0; i < vd->channels; i++) { + struct pl08x_phy_chan *ch = &pl08x->phy_chans[i]; + + ch->id = i; + ch->base = pl08x->base + PL080_Cx_BASE(i); + spin_lock_init(&ch->lock); + ch->serving = NULL; + ch->signal = -1; + dev_dbg(&adev->dev, "physical channel %d is %s\n", + i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE"); + } + + /* Register as many memcpy channels as there are physical channels */ + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy, + pl08x->vd->channels, false); + if (ret <= 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate memcpy channels - %d\n", + __func__, ret); + goto out_no_memcpy; + } + pl08x->memcpy.chancnt = ret; + + /* Register slave channels */ + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave, + pl08x->pd->num_slave_channels, true); + if (ret <= 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate slave channels - %d\n", + __func__, ret); + goto out_no_slave; + } + pl08x->slave.chancnt = ret; + + ret = dma_async_device_register(&pl08x->memcpy); + if (ret) { + dev_warn(&pl08x->adev->dev, + "%s failed to register memcpy as an async device - %d\n", + __func__, ret); + goto out_no_memcpy_reg; + } + + ret = dma_async_device_register(&pl08x->slave); + if (ret) { + dev_warn(&pl08x->adev->dev, + "%s failed to register slave as an async device - %d\n", + __func__, ret); + goto out_no_slave_reg; + } + + amba_set_drvdata(adev, pl08x); + init_pl08x_debugfs(pl08x); + dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n", + amba_part(adev), amba_rev(adev), + (unsigned long long)adev->res.start, adev->irq[0]); + + pm_runtime_put(&adev->dev); + return 0; + +out_no_slave_reg: + dma_async_device_unregister(&pl08x->memcpy); +out_no_memcpy_reg: + pl08x_free_virtual_channels(&pl08x->slave); +out_no_slave: + pl08x_free_virtual_channels(&pl08x->memcpy); +out_no_memcpy: + kfree(pl08x->phy_chans); +out_no_phychans: + free_irq(adev->irq[0], pl08x); +out_no_irq: + iounmap(pl08x->base); +out_no_ioremap: + dma_pool_destroy(pl08x->pool); +out_no_lli_pool: +out_no_platdata: + pm_runtime_put(&adev->dev); + pm_runtime_disable(&adev->dev); + + kfree(pl08x); +out_no_pl08x: + amba_release_regions(adev); + return ret; +} + +/* PL080 has 8 channels and the PL080 have just 2 */ +static struct vendor_data vendor_pl080 = { + .channels = 8, + .dualmaster = true, +}; + +static struct vendor_data vendor_pl081 = { + .channels = 2, + .dualmaster = false, +}; + +static struct amba_id pl08x_ids[] = { + /* PL080 */ + { + .id = 0x00041080, + .mask = 0x000fffff, + .data = &vendor_pl080, + }, + /* PL081 */ + { + .id = 0x00041081, + .mask = 0x000fffff, + .data = &vendor_pl081, + }, + /* Nomadik 8815 PL080 variant */ + { + .id = 0x00280880, + .mask = 0x00ffffff, + .data = &vendor_pl080, + }, + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl08x_ids); + +static struct amba_driver pl08x_amba_driver = { + .drv.name = DRIVER_NAME, + .id_table = pl08x_ids, + .probe = pl08x_probe, +}; + +static int __init pl08x_init(void) +{ + int retval; + retval = amba_driver_register(&pl08x_amba_driver); + if (retval) + printk(KERN_WARNING DRIVER_NAME + "failed to register as an AMBA device (%d)\n", + retval); + return retval; +} +subsys_initcall(pl08x_init); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c new file mode 100644 index 00000000..bf0d7e4e --- /dev/null +++ b/drivers/dma/at_hdmac.c @@ -0,0 +1,1583 @@ +/* + * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) + * + * Copyright (C) 2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * This supports the Atmel AHB DMA Controller, + * + * The driver has currently been tested with the Atmel AT91SAM9RL + * and AT91SAM9G45 series. + */ + +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#include "at_hdmac_regs.h" +#include "dmaengine.h" + +/* + * Glossary + * -------- + * + * at_hdmac : Name of the ATmel AHB DMA Controller + * at_dma_ / atdma : ATmel DMA controller entity related + * atc_ / atchan : ATmel DMA Channel entity related + */ + +#define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) +#define ATC_DEFAULT_CTRLA (0) +#define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ + |ATC_DIF(AT_DMA_MEM_IF)) + +/* + * Initial number of descriptors to allocate for each channel. This could + * be increased during dma usage. + */ +static unsigned int init_nr_desc_per_channel = 64; +module_param(init_nr_desc_per_channel, uint, 0644); +MODULE_PARM_DESC(init_nr_desc_per_channel, + "initial descriptors per channel (default: 64)"); + + +/* prototypes */ +static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); + + +/*----------------------------------------------------------------------*/ + +static struct at_desc *atc_first_active(struct at_dma_chan *atchan) +{ + return list_first_entry(&atchan->active_list, + struct at_desc, desc_node); +} + +static struct at_desc *atc_first_queued(struct at_dma_chan *atchan) +{ + return list_first_entry(&atchan->queue, + struct at_desc, desc_node); +} + +/** + * atc_alloc_descriptor - allocate and return an initialized descriptor + * @chan: the channel to allocate descriptors for + * @gfp_flags: GFP allocation flags + * + * Note: The ack-bit is positioned in the descriptor flag at creation time + * to make initial allocation more convenient. This bit will be cleared + * and control will be given to client at usage time (during + * preparation functions). + */ +static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, + gfp_t gfp_flags) +{ + struct at_desc *desc = NULL; + struct at_dma *atdma = to_at_dma(chan->device); + dma_addr_t phys; + + desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); + if (desc) { + memset(desc, 0, sizeof(struct at_desc)); + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, chan); + /* txd.flags will be overwritten in prep functions */ + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.tx_submit = atc_tx_submit; + desc->txd.phys = phys; + } + + return desc; +} + +/** + * atc_desc_get - get an unused descriptor from free_list + * @atchan: channel we want a new descriptor for + */ +static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) +{ + struct at_desc *desc, *_desc; + struct at_desc *ret = NULL; + unsigned long flags; + unsigned int i = 0; + LIST_HEAD(tmp_list); + + spin_lock_irqsave(&atchan->lock, flags); + list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { + i++; + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&atchan->chan_common), + "desc %p not ACKed\n", desc); + } + spin_unlock_irqrestore(&atchan->lock, flags); + dev_vdbg(chan2dev(&atchan->chan_common), + "scanned %u descriptors on freelist\n", i); + + /* no more descriptor available in initial pool: create one more */ + if (!ret) { + ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC); + if (ret) { + spin_lock_irqsave(&atchan->lock, flags); + atchan->descs_allocated++; + spin_unlock_irqrestore(&atchan->lock, flags); + } else { + dev_err(chan2dev(&atchan->chan_common), + "not enough descriptors available\n"); + } + } + + return ret; +} + +/** + * atc_desc_put - move a descriptor, including any children, to the free list + * @atchan: channel we work on + * @desc: descriptor, at the head of a chain, to move to free list + */ +static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) +{ + if (desc) { + struct at_desc *child; + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + list_for_each_entry(child, &desc->tx_list, desc_node) + dev_vdbg(chan2dev(&atchan->chan_common), + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->tx_list, &atchan->free_list); + dev_vdbg(chan2dev(&atchan->chan_common), + "moving desc %p to freelist\n", desc); + list_add(&desc->desc_node, &atchan->free_list); + spin_unlock_irqrestore(&atchan->lock, flags); + } +} + +/** + * atc_desc_chain - build chain adding a descripor + * @first: address of first descripor of the chain + * @prev: address of previous descripor of the chain + * @desc: descriptor to queue + * + * Called from prep_* functions + */ +static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, + struct at_desc *desc) +{ + if (!(*first)) { + *first = desc; + } else { + /* inform the HW lli about chaining */ + (*prev)->lli.dscr = desc->txd.phys; + /* insert the link descriptor to the LD ring */ + list_add_tail(&desc->desc_node, + &(*first)->tx_list); + } + *prev = desc; +} + +/** + * atc_dostart - starts the DMA engine for real + * @atchan: the channel we want to start + * @first: first descriptor in the list we want to begin with + * + * Called with atchan->lock held and bh disabled + */ +static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + /* ASSERT: channel is idle */ + if (atc_chan_is_enabled(atchan)) { + dev_err(chan2dev(&atchan->chan_common), + "BUG: Attempted to start non-idle channel\n"); + dev_err(chan2dev(&atchan->chan_common), + " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, DSCR)); + + /* The tasklet will hopefully advance the queue... */ + return; + } + + vdbg_dump_regs(atchan); + + channel_writel(atchan, SADDR, 0); + channel_writel(atchan, DADDR, 0); + channel_writel(atchan, CTRLA, 0); + channel_writel(atchan, CTRLB, 0); + channel_writel(atchan, DSCR, first->txd.phys); + dma_writel(atdma, CHER, atchan->mask); + + vdbg_dump_regs(atchan); +} + +/** + * atc_chain_complete - finish work for one transaction chain + * @atchan: channel we work on + * @desc: descriptor at the head of the chain we want do complete + * + * Called with atchan->lock held and bh disabled */ +static void +atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) +{ + struct dma_async_tx_descriptor *txd = &desc->txd; + + dev_vdbg(chan2dev(&atchan->chan_common), + "descriptor %u complete\n", txd->cookie); + + /* mark the descriptor as complete for non cyclic cases only */ + if (!atc_chan_is_cyclic(atchan)) + dma_cookie_complete(txd); + + /* move children to free_list */ + list_splice_init(&desc->tx_list, &atchan->free_list); + /* move myself to free_list */ + list_move(&desc->desc_node, &atchan->free_list); + + /* unmap dma addresses (not on slave channels) */ + if (!atchan->chan_common.private) { + struct device *parent = chan2parent(&atchan->chan_common); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + } + } + + /* for cyclic transfers, + * no need to replay callback function while stopping */ + if (!atc_chan_is_cyclic(atchan)) { + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); + } + + dma_run_dependencies(txd); +} + +/** + * atc_complete_all - finish work for all transactions + * @atchan: channel to complete transactions for + * + * Eventually submit queued descriptors if any + * + * Assume channel is idle while calling this function + * Called with atchan->lock held and bh disabled + */ +static void atc_complete_all(struct at_dma_chan *atchan) +{ + struct at_desc *desc, *_desc; + LIST_HEAD(list); + + dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); + + BUG_ON(atc_chan_is_enabled(atchan)); + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + if (!list_empty(&atchan->queue)) + atc_dostart(atchan, atc_first_queued(atchan)); + /* empty active_list now it is completed */ + list_splice_init(&atchan->active_list, &list); + /* empty queue list by moving descriptors (if any) to active_list */ + list_splice_init(&atchan->queue, &atchan->active_list); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + atc_chain_complete(atchan, desc); +} + +/** + * atc_cleanup_descriptors - cleanup up finished descriptors in active_list + * @atchan: channel to be cleaned up + * + * Called with atchan->lock held and bh disabled + */ +static void atc_cleanup_descriptors(struct at_dma_chan *atchan) +{ + struct at_desc *desc, *_desc; + struct at_desc *child; + + dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n"); + + list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { + if (!(desc->lli.ctrla & ATC_DONE)) + /* This one is currently in progress */ + return; + + list_for_each_entry(child, &desc->tx_list, desc_node) + if (!(child->lli.ctrla & ATC_DONE)) + /* Currently in progress */ + return; + + /* + * No descriptors so far seem to be in progress, i.e. + * this chain must be done. + */ + atc_chain_complete(atchan, desc); + } +} + +/** + * atc_advance_work - at the end of a transaction, move forward + * @atchan: channel where the transaction ended + * + * Called with atchan->lock held and bh disabled + */ +static void atc_advance_work(struct at_dma_chan *atchan) +{ + dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + + if (list_empty(&atchan->active_list) || + list_is_singular(&atchan->active_list)) { + atc_complete_all(atchan); + } else { + atc_chain_complete(atchan, atc_first_active(atchan)); + /* advance work */ + atc_dostart(atchan, atc_first_active(atchan)); + } +} + + +/** + * atc_handle_error - handle errors reported by DMA controller + * @atchan: channel where error occurs + * + * Called with atchan->lock held and bh disabled + */ +static void atc_handle_error(struct at_dma_chan *atchan) +{ + struct at_desc *bad_desc; + struct at_desc *child; + + /* + * The descriptor currently at the head of the active list is + * broked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + bad_desc = atc_first_active(atchan); + list_del_init(&bad_desc->desc_node); + + /* As we are stopped, take advantage to push queued descriptors + * in active_list */ + list_splice_init(&atchan->queue, atchan->active_list.prev); + + /* Try to restart the controller */ + if (!list_empty(&atchan->active_list)) + atc_dostart(atchan, atc_first_active(atchan)); + + /* + * KERN_CRITICAL may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_crit(chan2dev(&atchan->chan_common), + "Bad descriptor submitted for DMA!\n"); + dev_crit(chan2dev(&atchan->chan_common), + " cookie: %d\n", bad_desc->txd.cookie); + atc_dump_lli(atchan, &bad_desc->lli); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) + atc_dump_lli(atchan, &child->lli); + + /* Pretend the descriptor completed successfully */ + atc_chain_complete(atchan, bad_desc); +} + +/** + * atc_handle_cyclic - at the end of a period, run callback function + * @atchan: channel used for cyclic operations + * + * Called with atchan->lock held and bh disabled + */ +static void atc_handle_cyclic(struct at_dma_chan *atchan) +{ + struct at_desc *first = atc_first_active(atchan); + struct dma_async_tx_descriptor *txd = &first->txd; + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + dev_vdbg(chan2dev(&atchan->chan_common), + "new cyclic period llp 0x%08x\n", + channel_readl(atchan, DSCR)); + + if (callback) + callback(param); +} + +/*-- IRQ & Tasklet ---------------------------------------------------*/ + +static void atc_tasklet(unsigned long data) +{ + struct at_dma_chan *atchan = (struct at_dma_chan *)data; + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status)) + atc_handle_error(atchan); + else if (atc_chan_is_cyclic(atchan)) + atc_handle_cyclic(atchan); + else + atc_advance_work(atchan); + + spin_unlock_irqrestore(&atchan->lock, flags); +} + +static irqreturn_t at_dma_interrupt(int irq, void *dev_id) +{ + struct at_dma *atdma = (struct at_dma *)dev_id; + struct at_dma_chan *atchan; + int i; + u32 status, pending, imr; + int ret = IRQ_NONE; + + do { + imr = dma_readl(atdma, EBCIMR); + status = dma_readl(atdma, EBCISR); + pending = status & imr; + + if (!pending) + break; + + dev_vdbg(atdma->dma_common.dev, + "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", + status, imr, pending); + + for (i = 0; i < atdma->dma_common.chancnt; i++) { + atchan = &atdma->chan[i]; + if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { + if (pending & AT_DMA_ERR(i)) { + /* Disable channel on AHB error */ + dma_writel(atdma, CHDR, + AT_DMA_RES(i) | atchan->mask); + /* Give information to tasklet */ + set_bit(ATC_IS_ERROR, &atchan->status); + } + tasklet_schedule(&atchan->tasklet); + ret = IRQ_HANDLED; + } + } + + } while (pending); + + return ret; +} + + +/*-- DMA Engine API --------------------------------------------------*/ + +/** + * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine + * @desc: descriptor at the head of the transaction chain + * + * Queue chain if DMA engine is working already + * + * Cookie increment and adding to active_list or queue must be atomic + */ +static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct at_desc *desc = txd_to_at_desc(tx); + struct at_dma_chan *atchan = to_at_dma_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + cookie = dma_cookie_assign(tx); + + if (list_empty(&atchan->active_list)) { + dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", + desc->txd.cookie); + atc_dostart(atchan, desc); + list_add_tail(&desc->desc_node, &atchan->active_list); + } else { + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", + desc->txd.cookie); + list_add_tail(&desc->desc_node, &atchan->queue); + } + + spin_unlock_irqrestore(&atchan->lock, flags); + + return cookie; +} + +/** + * atc_prep_dma_memcpy - prepare a memcpy operation + * @chan: the channel to prepare operation on + * @dest: operation virtual destination address + * @src: operation virtual source address + * @len: operation length + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_desc *desc = NULL; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + size_t xfer_count; + size_t offset; + unsigned int src_width; + unsigned int dst_width; + u32 ctrla; + u32 ctrlb; + + dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n", + dest, src, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + ctrla = ATC_DEFAULT_CTRLA; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN + | ATC_SRC_ADDR_MODE_INCR + | ATC_DST_ADDR_MODE_INCR + | ATC_FC_MEM2MEM; + + /* + * We can be a lot more clever here, but this should take care + * of the most common optimization. + */ + if (!((src | dest | len) & 3)) { + ctrla |= ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD; + src_width = dst_width = 2; + } else if (!((src | dest | len) & 1)) { + ctrla |= ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD; + src_width = dst_width = 1; + } else { + ctrla |= ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE; + src_width = dst_width = 0; + } + + for (offset = 0; offset < len; offset += xfer_count << src_width) { + xfer_count = min_t(size_t, (len - offset) >> src_width, + ATC_BTSIZE_MAX); + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + desc->lli.saddr = src + offset; + desc->lli.daddr = dest + offset; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->txd.cookie = 0; + + atc_desc_chain(&first, &prev, desc); + } + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = len; + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(desc); + + first->txd.flags = flags; /* client is in control of this ack */ + + return &first->txd; + +err_desc_get: + atc_desc_put(atchan, first); + return NULL; +} + + +/** + * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: tx descriptor status flags + * @context: transaction context (ignored) + */ +static struct dma_async_tx_descriptor * +atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct dma_slave_config *sconfig = &atchan->dma_sconfig; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + u32 ctrla; + u32 ctrlb; + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n", + sg_len, + direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE", + flags); + + if (unlikely(!atslave || !sg_len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; + ctrlb = ATC_IEN; + + switch (direction) { + case DMA_MEM_TO_DEV: + reg_width = convert_buswidth(sconfig->dst_addr_width); + ctrla |= ATC_DST_WIDTH(reg_width); + ctrlb |= ATC_DST_ADDR_MODE_FIXED + | ATC_SRC_ADDR_MODE_INCR + | ATC_FC_MEM2PER + | ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF); + reg = sconfig->dst_addr; + for_each_sg(sgl, sg, sg_len, i) { + struct at_desc *desc; + u32 len; + u32 mem; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.saddr = mem; + desc->lli.daddr = reg; + desc->lli.ctrla = ctrla + | ATC_SRC_WIDTH(mem_width) + | len >> mem_width; + desc->lli.ctrlb = ctrlb; + + atc_desc_chain(&first, &prev, desc); + total_len += len; + } + break; + case DMA_DEV_TO_MEM: + reg_width = convert_buswidth(sconfig->src_addr_width); + ctrla |= ATC_SRC_WIDTH(reg_width); + ctrlb |= ATC_DST_ADDR_MODE_INCR + | ATC_SRC_ADDR_MODE_FIXED + | ATC_FC_PER2MEM + | ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF); + + reg = sconfig->src_addr; + for_each_sg(sgl, sg, sg_len, i) { + struct at_desc *desc; + u32 len; + u32 mem; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.saddr = reg; + desc->lli.daddr = mem; + desc->lli.ctrla = ctrla + | ATC_DST_WIDTH(mem_width) + | len >> reg_width; + desc->lli.ctrlb = ctrlb; + + atc_desc_chain(&first, &prev, desc); + total_len += len; + } + break; + default: + return NULL; + } + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(prev); + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = total_len; + + /* first link descriptor of list is responsible of flags */ + first->txd.flags = flags; /* client is in control of this ack */ + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "not enough descriptors available\n"); + atc_desc_put(atchan, first); + return NULL; +} + +/** + * atc_dma_cyclic_check_values + * Check for too big/unaligned periods and unaligned DMA buffer + */ +static int +atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr, + size_t period_len, enum dma_transfer_direction direction) +{ + if (period_len > (ATC_BTSIZE_MAX << reg_width)) + goto err_out; + if (unlikely(period_len & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(buf_addr & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(!(direction & (DMA_DEV_TO_MEM | DMA_MEM_TO_DEV)))) + goto err_out; + + return 0; + +err_out: + return -EINVAL; +} + +/** + * atc_dma_cyclic_fill_desc - Fill one period decriptor + */ +static int +atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, + unsigned int period_index, dma_addr_t buf_addr, + unsigned int reg_width, size_t period_len, + enum dma_transfer_direction direction) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct dma_slave_config *sconfig = &atchan->dma_sconfig; + u32 ctrla; + + /* prepare common CRTLA value */ + ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla + | ATC_DST_WIDTH(reg_width) + | ATC_SRC_WIDTH(reg_width) + | period_len >> reg_width; + + switch (direction) { + case DMA_MEM_TO_DEV: + desc->lli.saddr = buf_addr + (period_len * period_index); + desc->lli.daddr = sconfig->dst_addr; + desc->lli.ctrla = ctrla; + desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED + | ATC_SRC_ADDR_MODE_INCR + | ATC_FC_MEM2PER + | ATC_SIF(AT_DMA_MEM_IF) + | ATC_DIF(AT_DMA_PER_IF); + break; + + case DMA_DEV_TO_MEM: + desc->lli.saddr = sconfig->src_addr; + desc->lli.daddr = buf_addr + (period_len * period_index); + desc->lli.ctrla = ctrla; + desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR + | ATC_SRC_ADDR_MODE_FIXED + | ATC_FC_PER2MEM + | ATC_SIF(AT_DMA_PER_IF) + | ATC_DIF(AT_DMA_MEM_IF); + break; + + default: + return -EINVAL; + } + + return 0; +} + +/** + * atc_prep_dma_cyclic - prepare the cyclic DMA transfer + * @chan: the DMA channel to prepare + * @buf_addr: physical DMA address where the buffer starts + * @buf_len: total number of bytes for the entire buffer + * @period_len: number of bytes for each period + * @direction: transfer direction, to or from device + * @context: transfer context (ignored) + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + void *context) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct dma_slave_config *sconfig = &atchan->dma_sconfig; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + unsigned long was_cyclic; + unsigned int reg_width; + unsigned int periods = buf_len / period_len; + unsigned int i; + + dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n", + direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE", + buf_addr, + periods, buf_len, period_len); + + if (unlikely(!atslave || !buf_len || !period_len)) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n"); + return NULL; + } + + was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status); + if (was_cyclic) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n"); + return NULL; + } + + if (sconfig->direction == DMA_MEM_TO_DEV) + reg_width = convert_buswidth(sconfig->dst_addr_width); + else + reg_width = convert_buswidth(sconfig->src_addr_width); + + /* Check for too big/unaligned periods and unaligned DMA buffer */ + if (atc_dma_cyclic_check_values(reg_width, buf_addr, + period_len, direction)) + goto err_out; + + /* build cyclic linked list */ + for (i = 0; i < periods; i++) { + struct at_desc *desc; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr, + reg_width, period_len, direction)) + goto err_desc_get; + + atc_desc_chain(&first, &prev, desc); + } + + /* lets make a cyclic list */ + prev->lli.dscr = first->txd.phys; + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = buf_len; + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "not enough descriptors available\n"); + atc_desc_put(atchan, first); +err_out: + clear_bit(ATC_IS_CYCLIC, &atchan->status); + return NULL; +} + +static int set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *sconfig) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + /* Check if it is chan is configured for slave transfers */ + if (!chan->private) + return -EINVAL; + + memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig)); + + convert_burst(&atchan->dma_sconfig.src_maxburst); + convert_burst(&atchan->dma_sconfig.dst_maxburst); + + return 0; +} + + +static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + int chan_id = atchan->chan_common.chan_id; + unsigned long flags; + + LIST_HEAD(list); + + dev_vdbg(chan2dev(chan), "atc_control (%d)\n", cmd); + + if (cmd == DMA_PAUSE) { + spin_lock_irqsave(&atchan->lock, flags); + + dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); + set_bit(ATC_IS_PAUSED, &atchan->status); + + spin_unlock_irqrestore(&atchan->lock, flags); + } else if (cmd == DMA_RESUME) { + if (!atc_chan_is_paused(atchan)) + return 0; + + spin_lock_irqsave(&atchan->lock, flags); + + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); + clear_bit(ATC_IS_PAUSED, &atchan->status); + + spin_unlock_irqrestore(&atchan->lock, flags); + } else if (cmd == DMA_TERMINATE_ALL) { + struct at_desc *desc, *_desc; + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_irqsave(&atchan->lock, flags); + + /* disabling channel: must also remove suspend state */ + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask); + + /* confirm that this channel is disabled */ + while (dma_readl(atdma, CHSR) & atchan->mask) + cpu_relax(); + + /* active_list entries will end up before queued entries */ + list_splice_init(&atchan->queue, &list); + list_splice_init(&atchan->active_list, &list); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + atc_chain_complete(atchan, desc); + + clear_bit(ATC_IS_PAUSED, &atchan->status); + /* if channel dedicated to cyclic operations, free it */ + clear_bit(ATC_IS_CYCLIC, &atchan->status); + + spin_unlock_irqrestore(&atchan->lock, flags); + } else if (cmd == DMA_SLAVE_CONFIG) { + return set_runtime_config(chan, (struct dma_slave_config *)arg); + } else { + return -ENXIO; + } + + return 0; +} + +/** + * atc_tx_status - poll for transaction completion + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @txstate: if not %NULL updated with transaction state + * + * If @txstate is passed in, upon return it reflect the driver + * internal state and can be used with dma_async_is_complete() to check + * the status of multiple cookies without re-checking hardware state. + */ +static enum dma_status +atc_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + unsigned long flags; + enum dma_status ret; + + spin_lock_irqsave(&atchan->lock, flags); + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_SUCCESS) { + atc_cleanup_descriptors(atchan); + + ret = dma_cookie_status(chan, cookie, txstate); + } + + last_complete = chan->completed_cookie; + last_used = chan->cookie; + + spin_unlock_irqrestore(&atchan->lock, flags); + + if (ret != DMA_SUCCESS) + dma_set_residue(txstate, atc_first_active(atchan)->len); + + if (atc_chan_is_paused(atchan)) + ret = DMA_PAUSED; + + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d (d%d, u%d)\n", + ret, cookie, last_complete ? last_complete : 0, + last_used ? last_used : 0); + + return ret; +} + +/** + * atc_issue_pending - try to finish work + * @chan: target DMA channel + */ +static void atc_issue_pending(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + unsigned long flags; + + dev_vdbg(chan2dev(chan), "issue_pending\n"); + + /* Not needed for cyclic transfers */ + if (atc_chan_is_cyclic(atchan)) + return; + + spin_lock_irqsave(&atchan->lock, flags); + if (!atc_chan_is_enabled(atchan)) { + atc_advance_work(atchan); + } + spin_unlock_irqrestore(&atchan->lock, flags); +} + +/** + * atc_alloc_chan_resources - allocate resources for DMA channel + * @chan: allocate descriptor resources for this channel + * @client: current client requesting the channel be ready for requests + * + * return - the number of allocated descriptors + */ +static int atc_alloc_chan_resources(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc; + struct at_dma_slave *atslave; + unsigned long flags; + int i; + u32 cfg; + LIST_HEAD(tmp_list); + + dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); + + /* ASSERT: channel is idle */ + if (atc_chan_is_enabled(atchan)) { + dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); + return -EIO; + } + + cfg = ATC_DEFAULT_CFG; + + atslave = chan->private; + if (atslave) { + /* + * We need controller-specific data to set up slave + * transfers. + */ + BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); + + /* if cfg configuration specified take it instad of default */ + if (atslave->cfg) + cfg = atslave->cfg; + } + + /* have we already been set up? + * reconfigure channel but no need to reallocate descriptors */ + if (!list_empty(&atchan->free_list)) + return atchan->descs_allocated; + + /* Allocate initial pool of descriptors */ + for (i = 0; i < init_nr_desc_per_channel; i++) { + desc = atc_alloc_descriptor(chan, GFP_KERNEL); + if (!desc) { + dev_err(atdma->dma_common.dev, + "Only %d initial descriptors\n", i); + break; + } + list_add_tail(&desc->desc_node, &tmp_list); + } + + spin_lock_irqsave(&atchan->lock, flags); + atchan->descs_allocated = i; + list_splice(&tmp_list, &atchan->free_list); + dma_cookie_init(chan); + spin_unlock_irqrestore(&atchan->lock, flags); + + /* channel parameters */ + channel_writel(atchan, CFG, cfg); + + dev_dbg(chan2dev(chan), + "alloc_chan_resources: allocated %d descriptors\n", + atchan->descs_allocated); + + return atchan->descs_allocated; +} + +/** + * atc_free_chan_resources - free all channel resources + * @chan: DMA channel + */ +static void atc_free_chan_resources(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc, *_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n", + atchan->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&atchan->active_list)); + BUG_ON(!list_empty(&atchan->queue)); + BUG_ON(atc_chan_is_enabled(atchan)); + + list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { + dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); + list_del(&desc->desc_node); + /* free link descriptor */ + dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys); + } + list_splice_init(&atchan->free_list, &list); + atchan->descs_allocated = 0; + atchan->status = 0; + + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); +} + + +/*-- Module Management -----------------------------------------------*/ + +/* cap_mask is a multi-u32 bitfield, fill it with proper C code. */ +static struct at_dma_platform_data at91sam9rl_config = { + .nr_channels = 2, +}; +static struct at_dma_platform_data at91sam9g45_config = { + .nr_channels = 8, +}; + +#if defined(CONFIG_OF) +static const struct of_device_id atmel_dma_dt_ids[] = { + { + .compatible = "atmel,at91sam9rl-dma", + .data = &at91sam9rl_config, + }, { + .compatible = "atmel,at91sam9g45-dma", + .data = &at91sam9g45_config, + }, { + /* sentinel */ + } +}; + +MODULE_DEVICE_TABLE(of, atmel_dma_dt_ids); +#endif + +static const struct platform_device_id atdma_devtypes[] = { + { + .name = "at91sam9rl_dma", + .driver_data = (unsigned long) &at91sam9rl_config, + }, { + .name = "at91sam9g45_dma", + .driver_data = (unsigned long) &at91sam9g45_config, + }, { + /* sentinel */ + } +}; + +static inline struct at_dma_platform_data * __init at_dma_get_driver_data( + struct platform_device *pdev) +{ + if (pdev->dev.of_node) { + const struct of_device_id *match; + match = of_match_node(atmel_dma_dt_ids, pdev->dev.of_node); + if (match == NULL) + return NULL; + return match->data; + } + return (struct at_dma_platform_data *) + platform_get_device_id(pdev)->driver_data; +} + +/** + * at_dma_off - disable DMA controller + * @atdma: the Atmel HDAMC device + */ +static void at_dma_off(struct at_dma *atdma) +{ + dma_writel(atdma, EN, 0); + + /* disable all interrupts */ + dma_writel(atdma, EBCIDR, -1L); + + /* confirm that all channels are disabled */ + while (dma_readl(atdma, CHSR) & atdma->all_chan_mask) + cpu_relax(); +} + +static int __init at_dma_probe(struct platform_device *pdev) +{ + struct resource *io; + struct at_dma *atdma; + size_t size; + int irq; + int err; + int i; + struct at_dma_platform_data *plat_dat; + + /* setup platform data for each SoC */ + dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask); + dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); + + /* get DMA parameters from controller type */ + plat_dat = at_dma_get_driver_data(pdev); + if (!plat_dat) + return -ENODEV; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + size = sizeof(struct at_dma); + size += plat_dat->nr_channels * sizeof(struct at_dma_chan); + atdma = kzalloc(size, GFP_KERNEL); + if (!atdma) + return -ENOMEM; + + /* discover transaction capabilities */ + atdma->dma_common.cap_mask = plat_dat->cap_mask; + atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; + + size = resource_size(io); + if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { + err = -EBUSY; + goto err_kfree; + } + + atdma->regs = ioremap(io->start, size); + if (!atdma->regs) { + err = -ENOMEM; + goto err_release_r; + } + + atdma->clk = clk_get(&pdev->dev, "dma_clk"); + if (IS_ERR(atdma->clk)) { + err = PTR_ERR(atdma->clk); + goto err_clk; + } + clk_enable(atdma->clk); + + /* force dma off, just in case */ + at_dma_off(atdma); + + err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma); + if (err) + goto err_irq; + + platform_set_drvdata(pdev, atdma); + + /* create a pool of consistent memory blocks for hardware descriptors */ + atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool", + &pdev->dev, sizeof(struct at_desc), + 4 /* word alignment */, 0); + if (!atdma->dma_desc_pool) { + dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + err = -ENOMEM; + goto err_pool_create; + } + + /* clear any pending interrupt */ + while (dma_readl(atdma, EBCISR)) + cpu_relax(); + + /* initialize channels related values */ + INIT_LIST_HEAD(&atdma->dma_common.channels); + for (i = 0; i < plat_dat->nr_channels; i++) { + struct at_dma_chan *atchan = &atdma->chan[i]; + + atchan->chan_common.device = &atdma->dma_common; + dma_cookie_init(&atchan->chan_common); + list_add_tail(&atchan->chan_common.device_node, + &atdma->dma_common.channels); + + atchan->ch_regs = atdma->regs + ch_regs(i); + spin_lock_init(&atchan->lock); + atchan->mask = 1 << i; + + INIT_LIST_HEAD(&atchan->active_list); + INIT_LIST_HEAD(&atchan->queue); + INIT_LIST_HEAD(&atchan->free_list); + + tasklet_init(&atchan->tasklet, atc_tasklet, + (unsigned long)atchan); + atc_enable_chan_irq(atdma, i); + } + + /* set base routines */ + atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; + atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; + atdma->dma_common.device_tx_status = atc_tx_status; + atdma->dma_common.device_issue_pending = atc_issue_pending; + atdma->dma_common.dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { + atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; + /* controller can do slave DMA: can trigger cyclic transfers */ + dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask); + atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic; + atdma->dma_common.device_control = atc_control; + } + + dma_writel(atdma, EN, AT_DMA_ENABLE); + + dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n", + dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", + plat_dat->nr_channels); + + dma_async_device_register(&atdma->dma_common); + + return 0; + +err_pool_create: + platform_set_drvdata(pdev, NULL); + free_irq(platform_get_irq(pdev, 0), atdma); +err_irq: + clk_disable(atdma->clk); + clk_put(atdma->clk); +err_clk: + iounmap(atdma->regs); + atdma->regs = NULL; +err_release_r: + release_mem_region(io->start, size); +err_kfree: + kfree(atdma); + return err; +} + +static int __exit at_dma_remove(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + struct dma_chan *chan, *_chan; + struct resource *io; + + at_dma_off(atdma); + dma_async_device_unregister(&atdma->dma_common); + + dma_pool_destroy(atdma->dma_desc_pool); + platform_set_drvdata(pdev, NULL); + free_irq(platform_get_irq(pdev, 0), atdma); + + list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + /* Disable interrupts */ + atc_disable_chan_irq(atdma, chan->chan_id); + tasklet_disable(&atchan->tasklet); + + tasklet_kill(&atchan->tasklet); + list_del(&chan->device_node); + } + + clk_disable(atdma->clk); + clk_put(atdma->clk); + + iounmap(atdma->regs); + atdma->regs = NULL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(io->start, resource_size(io)); + + kfree(atdma); + + return 0; +} + +static void at_dma_shutdown(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + + at_dma_off(platform_get_drvdata(pdev)); + clk_disable(atdma->clk); +} + +static int at_dma_prepare(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct at_dma *atdma = platform_get_drvdata(pdev); + struct dma_chan *chan, *_chan; + + list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + /* wait for transaction completion (except in cyclic case) */ + if (atc_chan_is_enabled(atchan) && !atc_chan_is_cyclic(atchan)) + return -EAGAIN; + } + return 0; +} + +static void atc_suspend_cyclic(struct at_dma_chan *atchan) +{ + struct dma_chan *chan = &atchan->chan_common; + + /* Channel should be paused by user + * do it anyway even if it is not done already */ + if (!atc_chan_is_paused(atchan)) { + dev_warn(chan2dev(chan), + "cyclic channel not paused, should be done by channel user\n"); + atc_control(chan, DMA_PAUSE, 0); + } + + /* now preserve additional data for cyclic operations */ + /* next descriptor address in the cyclic list */ + atchan->save_dscr = channel_readl(atchan, DSCR); + + vdbg_dump_regs(atchan); +} + +static int at_dma_suspend_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct at_dma *atdma = platform_get_drvdata(pdev); + struct dma_chan *chan, *_chan; + + /* preserve data */ + list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + if (atc_chan_is_cyclic(atchan)) + atc_suspend_cyclic(atchan); + atchan->save_cfg = channel_readl(atchan, CFG); + } + atdma->save_imr = dma_readl(atdma, EBCIMR); + + /* disable DMA controller */ + at_dma_off(atdma); + clk_disable(atdma->clk); + return 0; +} + +static void atc_resume_cyclic(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + /* restore channel status for cyclic descriptors list: + * next descriptor in the cyclic list at the time of suspend */ + channel_writel(atchan, SADDR, 0); + channel_writel(atchan, DADDR, 0); + channel_writel(atchan, CTRLA, 0); + channel_writel(atchan, CTRLB, 0); + channel_writel(atchan, DSCR, atchan->save_dscr); + dma_writel(atdma, CHER, atchan->mask); + + /* channel pause status should be removed by channel user + * We cannot take the initiative to do it here */ + + vdbg_dump_regs(atchan); +} + +static int at_dma_resume_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct at_dma *atdma = platform_get_drvdata(pdev); + struct dma_chan *chan, *_chan; + + /* bring back DMA controller */ + clk_enable(atdma->clk); + dma_writel(atdma, EN, AT_DMA_ENABLE); + + /* clear any pending interrupt */ + while (dma_readl(atdma, EBCISR)) + cpu_relax(); + + /* restore saved data */ + dma_writel(atdma, EBCIER, atdma->save_imr); + list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + channel_writel(atchan, CFG, atchan->save_cfg); + if (atc_chan_is_cyclic(atchan)) + atc_resume_cyclic(atchan); + } + return 0; +} + +static const struct dev_pm_ops at_dma_dev_pm_ops = { + .prepare = at_dma_prepare, + .suspend_noirq = at_dma_suspend_noirq, + .resume_noirq = at_dma_resume_noirq, +}; + +static struct platform_driver at_dma_driver = { + .remove = __exit_p(at_dma_remove), + .shutdown = at_dma_shutdown, + .id_table = atdma_devtypes, + .driver = { + .name = "at_hdmac", + .pm = &at_dma_dev_pm_ops, + .of_match_table = of_match_ptr(atmel_dma_dt_ids), + }, +}; + +static int __init at_dma_init(void) +{ + return platform_driver_probe(&at_dma_driver, at_dma_probe); +} +subsys_initcall(at_dma_init); + +static void __exit at_dma_exit(void) +{ + platform_driver_unregister(&at_dma_driver); +} +module_exit(at_dma_exit); + +MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); +MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:at_hdmac"); diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h new file mode 100644 index 00000000..897a8bca --- /dev/null +++ b/drivers/dma/at_hdmac_regs.h @@ -0,0 +1,428 @@ +/* + * Header file for the Atmel AHB DMA Controller driver + * + * Copyright (C) 2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef AT_HDMAC_REGS_H +#define AT_HDMAC_REGS_H + +#include <mach/at_hdmac.h> + +#define AT_DMA_MAX_NR_CHANNELS 8 + + +#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ +#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ +#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) +#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) + +#define AT_DMA_EN 0x04 /* Controller Enable Register */ +#define AT_DMA_ENABLE (0x1 << 0) + +#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ +#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ + +#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ +#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ + +#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ +#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ + +#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ +#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ + +/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1C /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) (0x1 << (x)) +#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) +#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) + +#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ +#define AT_DMA_ENA(x) (0x1 << (x)) +#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) +#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) + +#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ +#define AT_DMA_DIS(x) (0x1 << (x)) +#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) + +#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ +#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) +#define AT_DMA_STAL(x) (0x1 << (24 + (x))) + + +#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ + +/* Hardware register offset for each channel */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ + + +/* Bitfield definitions */ + +/* Bitfields in DSCR */ +#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ + +/* Bitfields in CTRLA */ +#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ +/* Chunck Tranfer size definitions are in at_hdmac.h */ +#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ +#define ATC_SRC_WIDTH(x) ((x) << 24) +#define ATC_SRC_WIDTH_BYTE (0x0 << 24) +#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) +#define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ +#define ATC_DST_WIDTH(x) ((x) << 28) +#define ATC_DST_WIDTH_BYTE (0x0 << 28) +#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) +#define ATC_DST_WIDTH_WORD (0x2 << 28) +#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ + +/* Bitfields in CTRLB */ +#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ + /* Specify AHB interfaces */ +#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ + +#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ +#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ +#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) +#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ +#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) +#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ +#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ + +/* Bitfields in CFG */ +/* are in at_hdmac.h */ + +/* Bitfields in SPIP */ +#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + +/* Bitfields in DPIP */ +#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + + +/*-- descriptors -----------------------------------------------------*/ + +/* LLI == Linked List Item; aka DMA buffer descriptor */ +struct at_lli { + /* values that are not changed by hardware */ + dma_addr_t saddr; + dma_addr_t daddr; + /* value that may get written back: */ + u32 ctrla; + /* more values that are not changed by hardware */ + u32 ctrlb; + dma_addr_t dscr; /* chain to next lli */ +}; + +/** + * struct at_desc - software descriptor + * @at_lli: hardware lli structure + * @txd: support for the async_tx api + * @desc_node: node on the channed descriptors list + * @len: total transaction bytecount + */ +struct at_desc { + /* FIRST values the hardware uses */ + struct at_lli lli; + + /* THEN values for driver housekeeping */ + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + struct list_head desc_node; + size_t len; +}; + +static inline struct at_desc * +txd_to_at_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct at_desc, txd); +} + + +/*-- Channels --------------------------------------------------------*/ + +/** + * atc_status - information bits stored in channel status flag + * + * Manipulated with atomic operations. + */ +enum atc_status { + ATC_IS_ERROR = 0, + ATC_IS_PAUSED = 1, + ATC_IS_CYCLIC = 24, +}; + +/** + * struct at_dma_chan - internal representation of an Atmel HDMAC channel + * @chan_common: common dmaengine channel object members + * @device: parent device + * @ch_regs: memory mapped register base + * @mask: channel index in a mask + * @status: transmit status information from irq/prep* functions + * to tasklet (use atomic operations) + * @tasklet: bottom half to finish transaction work + * @save_cfg: configuration register that is saved on suspend/resume cycle + * @save_dscr: for cyclic operations, preserve next descriptor address in + * the cyclic list on suspend/resume cycle + * @dma_sconfig: configuration for slave transfers, passed via DMA_SLAVE_CONFIG + * @lock: serializes enqueue/dequeue operations to descriptors lists + * @active_list: list of descriptors dmaengine is being running on + * @queue: list of descriptors ready to be submitted to engine + * @free_list: list of descriptors usable by the channel + * @descs_allocated: records the actual size of the descriptor pool + */ +struct at_dma_chan { + struct dma_chan chan_common; + struct at_dma *device; + void __iomem *ch_regs; + u8 mask; + unsigned long status; + struct tasklet_struct tasklet; + u32 save_cfg; + u32 save_dscr; + struct dma_slave_config dma_sconfig; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int descs_allocated; +}; + +#define channel_readl(atchan, name) \ + __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) + +#define channel_writel(atchan, name, val) \ + __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) +{ + return container_of(dchan, struct at_dma_chan, chan_common); +} + +/* + * Fix sconfig's burst size according to at_hdmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. + * + * This can be done by finding most significant bit set. + */ +static inline void convert_burst(u32 *maxburst) +{ + if (*maxburst > 1) + *maxburst = fls(*maxburst) - 2; + else + *maxburst = 0; +} + +/* + * Fix sconfig's bus width according to at_hdmac. + * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2. + */ +static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) +{ + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return 1; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return 2; + default: + /* For 1 byte width or fallback */ + return 0; + } +} + +/*-- Controller ------------------------------------------------------*/ + +/** + * struct at_dma - internal representation of an Atmel HDMA Controller + * @chan_common: common dmaengine dma_device object members + * @atdma_devtype: identifier of DMA controller compatibility + * @ch_regs: memory mapped register base + * @clk: dma controller clock + * @save_imr: interrupt mask register that is saved on suspend/resume cycle + * @all_chan_mask: all channels availlable in a mask + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @chan: channels table to store at_dma_chan structures + */ +struct at_dma { + struct dma_device dma_common; + void __iomem *regs; + struct clk *clk; + u32 save_imr; + + u8 all_chan_mask; + + struct dma_pool *dma_desc_pool; + /* AT THE END channels table */ + struct at_dma_chan chan[0]; +}; + +#define dma_readl(atdma, name) \ + __raw_readl((atdma)->regs + AT_DMA_##name) +#define dma_writel(atdma, name, val) \ + __raw_writel((val), (atdma)->regs + AT_DMA_##name) + +static inline struct at_dma *to_at_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct at_dma, dma_common); +} + + +/*-- Helper functions ------------------------------------------------*/ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +#if defined(VERBOSE_DEBUG) +static void vdbg_dump_regs(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + dev_err(chan2dev(&atchan->chan_common), + " channel %d : imr = 0x%x, chsr = 0x%x\n", + atchan->chan_common.chan_id, + dma_readl(atdma, EBCIMR), + dma_readl(atdma, CHSR)); + + dev_err(chan2dev(&atchan->chan_common), + " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, CFG), + channel_readl(atchan, DSCR)); +} +#else +static void vdbg_dump_regs(struct at_dma_chan *atchan) {} +#endif + +static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) +{ + dev_printk(KERN_CRIT, chan2dev(&atchan->chan_common), + " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", + lli->saddr, lli->daddr, + lli->ctrla, lli->ctrlb, lli->dscr); +} + + +static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on) +{ + u32 ebci; + + /* enable interrupts on buffer transfer completion & error */ + ebci = AT_DMA_BTC(chan_id) + | AT_DMA_ERR(chan_id); + if (on) + dma_writel(atdma, EBCIER, ebci); + else + dma_writel(atdma, EBCIDR, ebci); +} + +static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 1); +} + +static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 0); +} + + +/** + * atc_chan_is_enabled - test if given channel is enabled + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + return !!(dma_readl(atdma, CHSR) & atchan->mask); +} + +/** + * atc_chan_is_paused - test channel pause/resume status + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_paused(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_PAUSED, &atchan->status); +} + +/** + * atc_chan_is_cyclic - test if given channel has cyclic property set + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_CYCLIC, &atchan->status); +} + +/** + * set_desc_eol - set end-of-link to descriptor so it will end transfer + * @desc: descriptor, signle or at the end of a chain, to end chain on + */ +static void set_desc_eol(struct at_desc *desc) +{ + u32 ctrlb = desc->lli.ctrlb; + + ctrlb &= ~ATC_IEN; + ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + + desc->lli.ctrlb = ctrlb; + desc->lli.dscr = 0; +} + +#endif /* AT_HDMAC_REGS_H */ diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c new file mode 100644 index 00000000..750925f9 --- /dev/null +++ b/drivers/dma/coh901318.c @@ -0,0 +1,1602 @@ +/* + * driver/dma/coh901318.c + * + * Copyright (C) 2007-2009 ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + * DMA driver for COH 901 318 + * Author: Per Friden <per.friden@stericsson.com> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/fs.h> /* everything... */ +#include <linux/scatterlist.h> +#include <linux/slab.h> /* kmalloc() */ +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/irqreturn.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <mach/coh901318.h> + +#include "coh901318_lli.h" +#include "dmaengine.h" + +#define COHC_2_DEV(cohc) (&cohc->chan.dev->device) + +#ifdef VERBOSE_DEBUG +#define COH_DBG(x) ({ if (1) x; 0; }) +#else +#define COH_DBG(x) ({ if (0) x; 0; }) +#endif + +struct coh901318_desc { + struct dma_async_tx_descriptor desc; + struct list_head node; + struct scatterlist *sg; + unsigned int sg_len; + struct coh901318_lli *lli; + enum dma_transfer_direction dir; + unsigned long flags; + u32 head_config; + u32 head_ctrl; +}; + +struct coh901318_base { + struct device *dev; + void __iomem *virtbase; + struct coh901318_pool pool; + struct powersave pm; + struct dma_device dma_slave; + struct dma_device dma_memcpy; + struct coh901318_chan *chans; + struct coh901318_platform *platform; +}; + +struct coh901318_chan { + spinlock_t lock; + int allocated; + int id; + int stopped; + + struct work_struct free_work; + struct dma_chan chan; + + struct tasklet_struct tasklet; + + struct list_head active; + struct list_head queue; + struct list_head free; + + unsigned long nbr_active_done; + unsigned long busy; + + u32 runtime_addr; + u32 runtime_ctrl; + + struct coh901318_base *base; +}; + +static void coh901318_list_print(struct coh901318_chan *cohc, + struct coh901318_lli *lli) +{ + struct coh901318_lli *l = lli; + int i = 0; + + while (l) { + dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src 0x%x" + ", dst 0x%x, link 0x%x virt_link_addr 0x%p\n", + i, l, l->control, l->src_addr, l->dst_addr, + l->link_addr, l->virt_link_addr); + i++; + l = l->virt_link_addr; + } +} + +#ifdef CONFIG_DEBUG_FS + +#define COH901318_DEBUGFS_ASSIGN(x, y) (x = y) + +static struct coh901318_base *debugfs_dma_base; +static struct dentry *dma_dentry; + +static int coh901318_debugfs_read(struct file *file, char __user *buf, + size_t count, loff_t *f_pos) +{ + u64 started_channels = debugfs_dma_base->pm.started_channels; + int pool_count = debugfs_dma_base->pool.debugfs_pool_counter; + int i; + int ret = 0; + char *dev_buf; + char *tmp; + int dev_size; + + dev_buf = kmalloc(4*1024, GFP_KERNEL); + if (dev_buf == NULL) + goto err_kmalloc; + tmp = dev_buf; + + tmp += sprintf(tmp, "DMA -- enabled dma channels\n"); + + for (i = 0; i < debugfs_dma_base->platform->max_channels; i++) + if (started_channels & (1 << i)) + tmp += sprintf(tmp, "channel %d\n", i); + + tmp += sprintf(tmp, "Pool alloc nbr %d\n", pool_count); + dev_size = tmp - dev_buf; + + /* No more to read if offset != 0 */ + if (*f_pos > dev_size) + goto out; + + if (count > dev_size - *f_pos) + count = dev_size - *f_pos; + + if (copy_to_user(buf, dev_buf + *f_pos, count)) + ret = -EINVAL; + ret = count; + *f_pos += count; + + out: + kfree(dev_buf); + return ret; + + err_kmalloc: + return 0; +} + +static const struct file_operations coh901318_debugfs_status_operations = { + .owner = THIS_MODULE, + .open = simple_open, + .read = coh901318_debugfs_read, + .llseek = default_llseek, +}; + + +static int __init init_coh901318_debugfs(void) +{ + + dma_dentry = debugfs_create_dir("dma", NULL); + + (void) debugfs_create_file("status", + S_IFREG | S_IRUGO, + dma_dentry, NULL, + &coh901318_debugfs_status_operations); + return 0; +} + +static void __exit exit_coh901318_debugfs(void) +{ + debugfs_remove_recursive(dma_dentry); +} + +module_init(init_coh901318_debugfs); +module_exit(exit_coh901318_debugfs); +#else + +#define COH901318_DEBUGFS_ASSIGN(x, y) + +#endif /* CONFIG_DEBUG_FS */ + +static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan) +{ + return container_of(chan, struct coh901318_chan, chan); +} + +static inline dma_addr_t +cohc_dev_addr(struct coh901318_chan *cohc) +{ + /* Runtime supplied address will take precedence */ + if (cohc->runtime_addr) + return cohc->runtime_addr; + return cohc->base->platform->chan_conf[cohc->id].dev_addr; +} + +static inline const struct coh901318_params * +cohc_chan_param(struct coh901318_chan *cohc) +{ + return &cohc->base->platform->chan_conf[cohc->id].param; +} + +static inline const struct coh_dma_channel * +cohc_chan_conf(struct coh901318_chan *cohc) +{ + return &cohc->base->platform->chan_conf[cohc->id]; +} + +static void enable_powersave(struct coh901318_chan *cohc) +{ + unsigned long flags; + struct powersave *pm = &cohc->base->pm; + + spin_lock_irqsave(&pm->lock, flags); + + pm->started_channels &= ~(1ULL << cohc->id); + + if (!pm->started_channels) { + /* DMA no longer intends to access memory */ + cohc->base->platform->access_memory_state(cohc->base->dev, + false); + } + + spin_unlock_irqrestore(&pm->lock, flags); +} +static void disable_powersave(struct coh901318_chan *cohc) +{ + unsigned long flags; + struct powersave *pm = &cohc->base->pm; + + spin_lock_irqsave(&pm->lock, flags); + + if (!pm->started_channels) { + /* DMA intends to access memory */ + cohc->base->platform->access_memory_state(cohc->base->dev, + true); + } + + pm->started_channels |= (1ULL << cohc->id); + + spin_unlock_irqrestore(&pm->lock, flags); +} + +static inline int coh901318_set_ctrl(struct coh901318_chan *cohc, u32 control) +{ + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + writel(control, + virtbase + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING * channel); + return 0; +} + +static inline int coh901318_set_conf(struct coh901318_chan *cohc, u32 conf) +{ + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + writel(conf, + virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING*channel); + return 0; +} + + +static int coh901318_start(struct coh901318_chan *cohc) +{ + u32 val; + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + disable_powersave(cohc); + + val = readl(virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + /* Enable channel */ + val |= COH901318_CX_CFG_CH_ENABLE; + writel(val, virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + return 0; +} + +static int coh901318_prep_linked_list(struct coh901318_chan *cohc, + struct coh901318_lli *lli) +{ + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + BUG_ON(readl(virtbase + COH901318_CX_STAT + + COH901318_CX_STAT_SPACING*channel) & + COH901318_CX_STAT_ACTIVE); + + writel(lli->src_addr, + virtbase + COH901318_CX_SRC_ADDR + + COH901318_CX_SRC_ADDR_SPACING * channel); + + writel(lli->dst_addr, virtbase + + COH901318_CX_DST_ADDR + + COH901318_CX_DST_ADDR_SPACING * channel); + + writel(lli->link_addr, virtbase + COH901318_CX_LNK_ADDR + + COH901318_CX_LNK_ADDR_SPACING * channel); + + writel(lli->control, virtbase + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING * channel); + + return 0; +} + +static struct coh901318_desc * +coh901318_desc_get(struct coh901318_chan *cohc) +{ + struct coh901318_desc *desc; + + if (list_empty(&cohc->free)) { + /* alloc new desc because we're out of used ones + * TODO: alloc a pile of descs instead of just one, + * avoid many small allocations. + */ + desc = kzalloc(sizeof(struct coh901318_desc), GFP_NOWAIT); + if (desc == NULL) + goto out; + INIT_LIST_HEAD(&desc->node); + dma_async_tx_descriptor_init(&desc->desc, &cohc->chan); + } else { + /* Reuse an old desc. */ + desc = list_first_entry(&cohc->free, + struct coh901318_desc, + node); + list_del(&desc->node); + /* Initialize it a bit so it's not insane */ + desc->sg = NULL; + desc->sg_len = 0; + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + } + + out: + return desc; +} + +static void +coh901318_desc_free(struct coh901318_chan *cohc, struct coh901318_desc *cohd) +{ + list_add_tail(&cohd->node, &cohc->free); +} + +/* call with irq lock held */ +static void +coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc) +{ + list_add_tail(&desc->node, &cohc->active); +} + +static struct coh901318_desc * +coh901318_first_active_get(struct coh901318_chan *cohc) +{ + struct coh901318_desc *d; + + if (list_empty(&cohc->active)) + return NULL; + + d = list_first_entry(&cohc->active, + struct coh901318_desc, + node); + return d; +} + +static void +coh901318_desc_remove(struct coh901318_desc *cohd) +{ + list_del(&cohd->node); +} + +static void +coh901318_desc_queue(struct coh901318_chan *cohc, struct coh901318_desc *desc) +{ + list_add_tail(&desc->node, &cohc->queue); +} + +static struct coh901318_desc * +coh901318_first_queued(struct coh901318_chan *cohc) +{ + struct coh901318_desc *d; + + if (list_empty(&cohc->queue)) + return NULL; + + d = list_first_entry(&cohc->queue, + struct coh901318_desc, + node); + return d; +} + +static inline u32 coh901318_get_bytes_in_lli(struct coh901318_lli *in_lli) +{ + struct coh901318_lli *lli = in_lli; + u32 bytes = 0; + + while (lli) { + bytes += lli->control & COH901318_CX_CTRL_TC_VALUE_MASK; + lli = lli->virt_link_addr; + } + return bytes; +} + +/* + * Get the number of bytes left to transfer on this channel, + * it is unwise to call this before stopping the channel for + * absolute measures, but for a rough guess you can still call + * it. + */ +static u32 coh901318_get_bytes_left(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + struct coh901318_desc *cohd; + struct list_head *pos; + unsigned long flags; + u32 left = 0; + int i = 0; + + spin_lock_irqsave(&cohc->lock, flags); + + /* + * If there are many queued jobs, we iterate and add the + * size of them all. We take a special look on the first + * job though, since it is probably active. + */ + list_for_each(pos, &cohc->active) { + /* + * The first job in the list will be working on the + * hardware. The job can be stopped but still active, + * so that the transfer counter is somewhere inside + * the buffer. + */ + cohd = list_entry(pos, struct coh901318_desc, node); + + if (i == 0) { + struct coh901318_lli *lli; + dma_addr_t ladd; + + /* Read current transfer count value */ + left = readl(cohc->base->virtbase + + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING * cohc->id) & + COH901318_CX_CTRL_TC_VALUE_MASK; + + /* See if the transfer is linked... */ + ladd = readl(cohc->base->virtbase + + COH901318_CX_LNK_ADDR + + COH901318_CX_LNK_ADDR_SPACING * + cohc->id) & + ~COH901318_CX_LNK_LINK_IMMEDIATE; + /* Single transaction */ + if (!ladd) + continue; + + /* + * Linked transaction, follow the lli, find the + * currently processing lli, and proceed to the next + */ + lli = cohd->lli; + while (lli && lli->link_addr != ladd) + lli = lli->virt_link_addr; + + if (lli) + lli = lli->virt_link_addr; + + /* + * Follow remaining lli links around to count the total + * number of bytes left + */ + left += coh901318_get_bytes_in_lli(lli); + } else { + left += coh901318_get_bytes_in_lli(cohd->lli); + } + i++; + } + + /* Also count bytes in the queued jobs */ + list_for_each(pos, &cohc->queue) { + cohd = list_entry(pos, struct coh901318_desc, node); + left += coh901318_get_bytes_in_lli(cohd->lli); + } + + spin_unlock_irqrestore(&cohc->lock, flags); + + return left; +} + +/* + * Pauses a transfer without losing data. Enables power save. + * Use this function in conjunction with coh901318_resume. + */ +static void coh901318_pause(struct dma_chan *chan) +{ + u32 val; + unsigned long flags; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + spin_lock_irqsave(&cohc->lock, flags); + + /* Disable channel in HW */ + val = readl(virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + /* Stopping infinite transfer */ + if ((val & COH901318_CX_CTRL_TC_ENABLE) == 0 && + (val & COH901318_CX_CFG_CH_ENABLE)) + cohc->stopped = 1; + + + val &= ~COH901318_CX_CFG_CH_ENABLE; + /* Enable twice, HW bug work around */ + writel(val, virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + writel(val, virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + /* Spin-wait for it to actually go inactive */ + while (readl(virtbase + COH901318_CX_STAT+COH901318_CX_STAT_SPACING * + channel) & COH901318_CX_STAT_ACTIVE) + cpu_relax(); + + /* Check if we stopped an active job */ + if ((readl(virtbase + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING * + channel) & COH901318_CX_CTRL_TC_VALUE_MASK) > 0) + cohc->stopped = 1; + + enable_powersave(cohc); + + spin_unlock_irqrestore(&cohc->lock, flags); +} + +/* Resumes a transfer that has been stopped via 300_dma_stop(..). + Power save is handled. +*/ +static void coh901318_resume(struct dma_chan *chan) +{ + u32 val; + unsigned long flags; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int channel = cohc->id; + + spin_lock_irqsave(&cohc->lock, flags); + + disable_powersave(cohc); + + if (cohc->stopped) { + /* Enable channel in HW */ + val = readl(cohc->base->virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + val |= COH901318_CX_CFG_CH_ENABLE; + + writel(val, cohc->base->virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING*channel); + + cohc->stopped = 0; + } + + spin_unlock_irqrestore(&cohc->lock, flags); +} + +bool coh901318_filter_id(struct dma_chan *chan, void *chan_id) +{ + unsigned int ch_nr = (unsigned int) chan_id; + + if (ch_nr == to_coh901318_chan(chan)->id) + return true; + + return false; +} +EXPORT_SYMBOL(coh901318_filter_id); + +/* + * DMA channel allocation + */ +static int coh901318_config(struct coh901318_chan *cohc, + struct coh901318_params *param) +{ + unsigned long flags; + const struct coh901318_params *p; + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + spin_lock_irqsave(&cohc->lock, flags); + + if (param) + p = param; + else + p = &cohc->base->platform->chan_conf[channel].param; + + /* Clear any pending BE or TC interrupt */ + if (channel < 32) { + writel(1 << channel, virtbase + COH901318_BE_INT_CLEAR1); + writel(1 << channel, virtbase + COH901318_TC_INT_CLEAR1); + } else { + writel(1 << (channel - 32), virtbase + + COH901318_BE_INT_CLEAR2); + writel(1 << (channel - 32), virtbase + + COH901318_TC_INT_CLEAR2); + } + + coh901318_set_conf(cohc, p->config); + coh901318_set_ctrl(cohc, p->ctrl_lli_last); + + spin_unlock_irqrestore(&cohc->lock, flags); + + return 0; +} + +/* must lock when calling this function + * start queued jobs, if any + * TODO: start all queued jobs in one go + * + * Returns descriptor if queued job is started otherwise NULL. + * If the queue is empty NULL is returned. + */ +static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc) +{ + struct coh901318_desc *cohd; + + /* + * start queued jobs, if any + * TODO: transmit all queued jobs in one go + */ + cohd = coh901318_first_queued(cohc); + + if (cohd != NULL) { + /* Remove from queue */ + coh901318_desc_remove(cohd); + /* initiate DMA job */ + cohc->busy = 1; + + coh901318_desc_submit(cohc, cohd); + + /* Program the transaction head */ + coh901318_set_conf(cohc, cohd->head_config); + coh901318_set_ctrl(cohc, cohd->head_ctrl); + coh901318_prep_linked_list(cohc, cohd->lli); + + /* start dma job on this channel */ + coh901318_start(cohc); + + } + + return cohd; +} + +/* + * This tasklet is called from the interrupt handler to + * handle each descriptor (DMA job) that is sent to a channel. + */ +static void dma_tasklet(unsigned long data) +{ + struct coh901318_chan *cohc = (struct coh901318_chan *) data; + struct coh901318_desc *cohd_fin; + unsigned long flags; + dma_async_tx_callback callback; + void *callback_param; + + dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d" + " nbr_active_done %ld\n", __func__, + cohc->id, cohc->nbr_active_done); + + spin_lock_irqsave(&cohc->lock, flags); + + /* get first active descriptor entry from list */ + cohd_fin = coh901318_first_active_get(cohc); + + if (cohd_fin == NULL) + goto err; + + /* locate callback to client */ + callback = cohd_fin->desc.callback; + callback_param = cohd_fin->desc.callback_param; + + /* sign this job as completed on the channel */ + dma_cookie_complete(&cohd_fin->desc); + + /* release the lli allocation and remove the descriptor */ + coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli); + + /* return desc to free-list */ + coh901318_desc_remove(cohd_fin); + coh901318_desc_free(cohc, cohd_fin); + + spin_unlock_irqrestore(&cohc->lock, flags); + + /* Call the callback when we're done */ + if (callback) + callback(callback_param); + + spin_lock_irqsave(&cohc->lock, flags); + + /* + * If another interrupt fired while the tasklet was scheduling, + * we don't get called twice, so we have this number of active + * counter that keep track of the number of IRQs expected to + * be handled for this channel. If there happen to be more than + * one IRQ to be ack:ed, we simply schedule this tasklet again. + */ + cohc->nbr_active_done--; + if (cohc->nbr_active_done) { + dev_dbg(COHC_2_DEV(cohc), "scheduling tasklet again, new IRQs " + "came in while we were scheduling this tasklet\n"); + if (cohc_chan_conf(cohc)->priority_high) + tasklet_hi_schedule(&cohc->tasklet); + else + tasklet_schedule(&cohc->tasklet); + } + + spin_unlock_irqrestore(&cohc->lock, flags); + + return; + + err: + spin_unlock_irqrestore(&cohc->lock, flags); + dev_err(COHC_2_DEV(cohc), "[%s] No active dma desc\n", __func__); +} + + +/* called from interrupt context */ +static void dma_tc_handle(struct coh901318_chan *cohc) +{ + /* + * If the channel is not allocated, then we shouldn't have + * any TC interrupts on it. + */ + if (!cohc->allocated) { + dev_err(COHC_2_DEV(cohc), "spurious interrupt from " + "unallocated channel\n"); + return; + } + + spin_lock(&cohc->lock); + + /* + * When we reach this point, at least one queue item + * should have been moved over from cohc->queue to + * cohc->active and run to completion, that is why we're + * getting a terminal count interrupt is it not? + * If you get this BUG() the most probable cause is that + * the individual nodes in the lli chain have IRQ enabled, + * so check your platform config for lli chain ctrl. + */ + BUG_ON(list_empty(&cohc->active)); + + cohc->nbr_active_done++; + + /* + * This attempt to take a job from cohc->queue, put it + * into cohc->active and start it. + */ + if (coh901318_queue_start(cohc) == NULL) + cohc->busy = 0; + + spin_unlock(&cohc->lock); + + /* + * This tasklet will remove items from cohc->active + * and thus terminates them. + */ + if (cohc_chan_conf(cohc)->priority_high) + tasklet_hi_schedule(&cohc->tasklet); + else + tasklet_schedule(&cohc->tasklet); +} + + +static irqreturn_t dma_irq_handler(int irq, void *dev_id) +{ + u32 status1; + u32 status2; + int i; + int ch; + struct coh901318_base *base = dev_id; + struct coh901318_chan *cohc; + void __iomem *virtbase = base->virtbase; + + status1 = readl(virtbase + COH901318_INT_STATUS1); + status2 = readl(virtbase + COH901318_INT_STATUS2); + + if (unlikely(status1 == 0 && status2 == 0)) { + dev_warn(base->dev, "spurious DMA IRQ from no channel!\n"); + return IRQ_HANDLED; + } + + /* TODO: consider handle IRQ in tasklet here to + * minimize interrupt latency */ + + /* Check the first 32 DMA channels for IRQ */ + while (status1) { + /* Find first bit set, return as a number. */ + i = ffs(status1) - 1; + ch = i; + + cohc = &base->chans[ch]; + spin_lock(&cohc->lock); + + /* Mask off this bit */ + status1 &= ~(1 << i); + /* Check the individual channel bits */ + if (test_bit(i, virtbase + COH901318_BE_INT_STATUS1)) { + dev_crit(COHC_2_DEV(cohc), + "DMA bus error on channel %d!\n", ch); + BUG_ON(1); + /* Clear BE interrupt */ + __set_bit(i, virtbase + COH901318_BE_INT_CLEAR1); + } else { + /* Caused by TC, really? */ + if (unlikely(!test_bit(i, virtbase + + COH901318_TC_INT_STATUS1))) { + dev_warn(COHC_2_DEV(cohc), + "ignoring interrupt not caused by terminal count on channel %d\n", ch); + /* Clear TC interrupt */ + BUG_ON(1); + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1); + } else { + /* Enable powersave if transfer has finished */ + if (!(readl(virtbase + COH901318_CX_STAT + + COH901318_CX_STAT_SPACING*ch) & + COH901318_CX_STAT_ENABLED)) { + enable_powersave(cohc); + } + + /* Must clear TC interrupt before calling + * dma_tc_handle + * in case tc_handle initiate a new dma job + */ + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1); + + dma_tc_handle(cohc); + } + } + spin_unlock(&cohc->lock); + } + + /* Check the remaining 32 DMA channels for IRQ */ + while (status2) { + /* Find first bit set, return as a number. */ + i = ffs(status2) - 1; + ch = i + 32; + cohc = &base->chans[ch]; + spin_lock(&cohc->lock); + + /* Mask off this bit */ + status2 &= ~(1 << i); + /* Check the individual channel bits */ + if (test_bit(i, virtbase + COH901318_BE_INT_STATUS2)) { + dev_crit(COHC_2_DEV(cohc), + "DMA bus error on channel %d!\n", ch); + /* Clear BE interrupt */ + BUG_ON(1); + __set_bit(i, virtbase + COH901318_BE_INT_CLEAR2); + } else { + /* Caused by TC, really? */ + if (unlikely(!test_bit(i, virtbase + + COH901318_TC_INT_STATUS2))) { + dev_warn(COHC_2_DEV(cohc), + "ignoring interrupt not caused by terminal count on channel %d\n", ch); + /* Clear TC interrupt */ + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2); + BUG_ON(1); + } else { + /* Enable powersave if transfer has finished */ + if (!(readl(virtbase + COH901318_CX_STAT + + COH901318_CX_STAT_SPACING*ch) & + COH901318_CX_STAT_ENABLED)) { + enable_powersave(cohc); + } + /* Must clear TC interrupt before calling + * dma_tc_handle + * in case tc_handle initiate a new dma job + */ + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2); + + dma_tc_handle(cohc); + } + } + spin_unlock(&cohc->lock); + } + + return IRQ_HANDLED; +} + +static int coh901318_alloc_chan_resources(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + unsigned long flags; + + dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n", + __func__, cohc->id); + + if (chan->client_count > 1) + return -EBUSY; + + spin_lock_irqsave(&cohc->lock, flags); + + coh901318_config(cohc, NULL); + + cohc->allocated = 1; + dma_cookie_init(chan); + + spin_unlock_irqrestore(&cohc->lock, flags); + + return 1; +} + +static void +coh901318_free_chan_resources(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int channel = cohc->id; + unsigned long flags; + + spin_lock_irqsave(&cohc->lock, flags); + + /* Disable HW */ + writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING*channel); + writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING*channel); + + cohc->allocated = 0; + + spin_unlock_irqrestore(&cohc->lock, flags); + + chan->device->device_control(chan, DMA_TERMINATE_ALL, 0); +} + + +static dma_cookie_t +coh901318_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct coh901318_desc *cohd = container_of(tx, struct coh901318_desc, + desc); + struct coh901318_chan *cohc = to_coh901318_chan(tx->chan); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&cohc->lock, flags); + cookie = dma_cookie_assign(tx); + + coh901318_desc_queue(cohc, cohd); + + spin_unlock_irqrestore(&cohc->lock, flags); + + return cookie; +} + +static struct dma_async_tx_descriptor * +coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t size, unsigned long flags) +{ + struct coh901318_lli *lli; + struct coh901318_desc *cohd; + unsigned long flg; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int lli_len; + u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last; + int ret; + + spin_lock_irqsave(&cohc->lock, flg); + + dev_vdbg(COHC_2_DEV(cohc), + "[%s] channel %d src 0x%x dest 0x%x size %d\n", + __func__, cohc->id, src, dest, size); + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last lli */ + ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE; + + lli_len = size >> MAX_DMA_PACKET_SIZE_SHIFT; + if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size) + lli_len++; + + lli = coh901318_lli_alloc(&cohc->base->pool, lli_len); + + if (lli == NULL) + goto err; + + ret = coh901318_lli_fill_memcpy( + &cohc->base->pool, lli, src, size, dest, + cohc_chan_param(cohc)->ctrl_lli_chained, + ctrl_last); + if (ret) + goto err; + + COH_DBG(coh901318_list_print(cohc, lli)); + + /* Pick a descriptor to handle this transfer */ + cohd = coh901318_desc_get(cohc); + cohd->lli = lli; + cohd->flags = flags; + cohd->desc.tx_submit = coh901318_tx_submit; + + spin_unlock_irqrestore(&cohc->lock, flg); + + return &cohd->desc; + err: + spin_unlock_irqrestore(&cohc->lock, flg); + return NULL; +} + +static struct dma_async_tx_descriptor * +coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + struct coh901318_lli *lli; + struct coh901318_desc *cohd; + const struct coh901318_params *params; + struct scatterlist *sg; + int len = 0; + int size; + int i; + u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained; + u32 ctrl = cohc_chan_param(cohc)->ctrl_lli; + u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last; + u32 config; + unsigned long flg; + int ret; + + if (!sgl) + goto out; + if (sgl->length == 0) + goto out; + + spin_lock_irqsave(&cohc->lock, flg); + + dev_vdbg(COHC_2_DEV(cohc), "[%s] sg_len %d dir %d\n", + __func__, sg_len, direction); + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last lli */ + ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE; + + params = cohc_chan_param(cohc); + config = params->config; + /* + * Add runtime-specific control on top, make + * sure the bits you set per peripheral channel are + * cleared in the default config from the platform. + */ + ctrl_chained |= cohc->runtime_ctrl; + ctrl_last |= cohc->runtime_ctrl; + ctrl |= cohc->runtime_ctrl; + + if (direction == DMA_MEM_TO_DEV) { + u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE | + COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE; + + config |= COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY; + ctrl_chained |= tx_flags; + ctrl_last |= tx_flags; + ctrl |= tx_flags; + } else if (direction == DMA_DEV_TO_MEM) { + u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST | + COH901318_CX_CTRL_DST_ADDR_INC_ENABLE; + + config |= COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY; + ctrl_chained |= rx_flags; + ctrl_last |= rx_flags; + ctrl |= rx_flags; + } else + goto err_direction; + + /* The dma only supports transmitting packages up to + * MAX_DMA_PACKET_SIZE. Calculate to total number of + * dma elemts required to send the entire sg list + */ + for_each_sg(sgl, sg, sg_len, i) { + unsigned int factor; + size = sg_dma_len(sg); + + if (size <= MAX_DMA_PACKET_SIZE) { + len++; + continue; + } + + factor = size >> MAX_DMA_PACKET_SIZE_SHIFT; + if ((factor << MAX_DMA_PACKET_SIZE_SHIFT) < size) + factor++; + + len += factor; + } + + pr_debug("Allocate %d lli:s for this transfer\n", len); + lli = coh901318_lli_alloc(&cohc->base->pool, len); + + if (lli == NULL) + goto err_dma_alloc; + + /* initiate allocated lli list */ + ret = coh901318_lli_fill_sg(&cohc->base->pool, lli, sgl, sg_len, + cohc_dev_addr(cohc), + ctrl_chained, + ctrl, + ctrl_last, + direction, COH901318_CX_CTRL_TC_IRQ_ENABLE); + if (ret) + goto err_lli_fill; + + + COH_DBG(coh901318_list_print(cohc, lli)); + + /* Pick a descriptor to handle this transfer */ + cohd = coh901318_desc_get(cohc); + cohd->head_config = config; + /* + * Set the default head ctrl for the channel to the one from the + * lli, things may have changed due to odd buffer alignment + * etc. + */ + cohd->head_ctrl = lli->control; + cohd->dir = direction; + cohd->flags = flags; + cohd->desc.tx_submit = coh901318_tx_submit; + cohd->lli = lli; + + spin_unlock_irqrestore(&cohc->lock, flg); + + return &cohd->desc; + err_lli_fill: + err_dma_alloc: + err_direction: + spin_unlock_irqrestore(&cohc->lock, flg); + out: + return NULL; +} + +static enum dma_status +coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + /* FIXME: should be conditional on ret != DMA_SUCCESS? */ + dma_set_residue(txstate, coh901318_get_bytes_left(chan)); + + if (ret == DMA_IN_PROGRESS && cohc->stopped) + ret = DMA_PAUSED; + + return ret; +} + +static void +coh901318_issue_pending(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&cohc->lock, flags); + + /* + * Busy means that pending jobs are already being processed, + * and then there is no point in starting the queue: the + * terminal count interrupt on the channel will take the next + * job on the queue and execute it anyway. + */ + if (!cohc->busy) + coh901318_queue_start(cohc); + + spin_unlock_irqrestore(&cohc->lock, flags); +} + +/* + * Here we wrap in the runtime dma control interface + */ +struct burst_table { + int burst_8bit; + int burst_16bit; + int burst_32bit; + u32 reg; +}; + +static const struct burst_table burst_sizes[] = { + { + .burst_8bit = 64, + .burst_16bit = 32, + .burst_32bit = 16, + .reg = COH901318_CX_CTRL_BURST_COUNT_64_BYTES, + }, + { + .burst_8bit = 48, + .burst_16bit = 24, + .burst_32bit = 12, + .reg = COH901318_CX_CTRL_BURST_COUNT_48_BYTES, + }, + { + .burst_8bit = 32, + .burst_16bit = 16, + .burst_32bit = 8, + .reg = COH901318_CX_CTRL_BURST_COUNT_32_BYTES, + }, + { + .burst_8bit = 16, + .burst_16bit = 8, + .burst_32bit = 4, + .reg = COH901318_CX_CTRL_BURST_COUNT_16_BYTES, + }, + { + .burst_8bit = 8, + .burst_16bit = 4, + .burst_32bit = 2, + .reg = COH901318_CX_CTRL_BURST_COUNT_8_BYTES, + }, + { + .burst_8bit = 4, + .burst_16bit = 2, + .burst_32bit = 1, + .reg = COH901318_CX_CTRL_BURST_COUNT_4_BYTES, + }, + { + .burst_8bit = 2, + .burst_16bit = 1, + .burst_32bit = 0, + .reg = COH901318_CX_CTRL_BURST_COUNT_2_BYTES, + }, + { + .burst_8bit = 1, + .burst_16bit = 0, + .burst_32bit = 0, + .reg = COH901318_CX_CTRL_BURST_COUNT_1_BYTE, + }, +}; + +static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + dma_addr_t addr; + enum dma_slave_buswidth addr_width; + u32 maxburst; + u32 runtime_ctrl = 0; + int i = 0; + + /* We only support mem to per or per to mem transfers */ + if (config->direction == DMA_DEV_TO_MEM) { + addr = config->src_addr; + addr_width = config->src_addr_width; + maxburst = config->src_maxburst; + } else if (config->direction == DMA_MEM_TO_DEV) { + addr = config->dst_addr; + addr_width = config->dst_addr_width; + maxburst = config->dst_maxburst; + } else { + dev_err(COHC_2_DEV(cohc), "illegal channel mode\n"); + return; + } + + dev_dbg(COHC_2_DEV(cohc), "configure channel for %d byte transfers\n", + addr_width); + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + runtime_ctrl |= + COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS | + COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS; + + while (i < ARRAY_SIZE(burst_sizes)) { + if (burst_sizes[i].burst_8bit <= maxburst) + break; + i++; + } + + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + runtime_ctrl |= + COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS | + COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS; + + while (i < ARRAY_SIZE(burst_sizes)) { + if (burst_sizes[i].burst_16bit <= maxburst) + break; + i++; + } + + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + /* Direction doesn't matter here, it's 32/32 bits */ + runtime_ctrl |= + COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | + COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS; + + while (i < ARRAY_SIZE(burst_sizes)) { + if (burst_sizes[i].burst_32bit <= maxburst) + break; + i++; + } + + break; + default: + dev_err(COHC_2_DEV(cohc), + "bad runtimeconfig: alien address width\n"); + return; + } + + runtime_ctrl |= burst_sizes[i].reg; + dev_dbg(COHC_2_DEV(cohc), + "selected burst size %d bytes for address width %d bytes, maxburst %d\n", + burst_sizes[i].burst_8bit, addr_width, maxburst); + + cohc->runtime_addr = addr; + cohc->runtime_ctrl = runtime_ctrl; +} + +static int +coh901318_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + unsigned long flags; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + struct coh901318_desc *cohd; + void __iomem *virtbase = cohc->base->virtbase; + + if (cmd == DMA_SLAVE_CONFIG) { + struct dma_slave_config *config = + (struct dma_slave_config *) arg; + + coh901318_dma_set_runtimeconfig(chan, config); + return 0; + } + + if (cmd == DMA_PAUSE) { + coh901318_pause(chan); + return 0; + } + + if (cmd == DMA_RESUME) { + coh901318_resume(chan); + return 0; + } + + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + /* The remainder of this function terminates the transfer */ + coh901318_pause(chan); + spin_lock_irqsave(&cohc->lock, flags); + + /* Clear any pending BE or TC interrupt */ + if (cohc->id < 32) { + writel(1 << cohc->id, virtbase + COH901318_BE_INT_CLEAR1); + writel(1 << cohc->id, virtbase + COH901318_TC_INT_CLEAR1); + } else { + writel(1 << (cohc->id - 32), virtbase + + COH901318_BE_INT_CLEAR2); + writel(1 << (cohc->id - 32), virtbase + + COH901318_TC_INT_CLEAR2); + } + + enable_powersave(cohc); + + while ((cohd = coh901318_first_active_get(cohc))) { + /* release the lli allocation*/ + coh901318_lli_free(&cohc->base->pool, &cohd->lli); + + /* return desc to free-list */ + coh901318_desc_remove(cohd); + coh901318_desc_free(cohc, cohd); + } + + while ((cohd = coh901318_first_queued(cohc))) { + /* release the lli allocation*/ + coh901318_lli_free(&cohc->base->pool, &cohd->lli); + + /* return desc to free-list */ + coh901318_desc_remove(cohd); + coh901318_desc_free(cohc, cohd); + } + + + cohc->nbr_active_done = 0; + cohc->busy = 0; + + spin_unlock_irqrestore(&cohc->lock, flags); + + return 0; +} + +void coh901318_base_init(struct dma_device *dma, const int *pick_chans, + struct coh901318_base *base) +{ + int chans_i; + int i = 0; + struct coh901318_chan *cohc; + + INIT_LIST_HEAD(&dma->channels); + + for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) { + for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) { + cohc = &base->chans[i]; + + cohc->base = base; + cohc->chan.device = dma; + cohc->id = i; + + /* TODO: do we really need this lock if only one + * client is connected to each channel? + */ + + spin_lock_init(&cohc->lock); + + cohc->nbr_active_done = 0; + cohc->busy = 0; + INIT_LIST_HEAD(&cohc->free); + INIT_LIST_HEAD(&cohc->active); + INIT_LIST_HEAD(&cohc->queue); + + tasklet_init(&cohc->tasklet, dma_tasklet, + (unsigned long) cohc); + + list_add_tail(&cohc->chan.device_node, + &dma->channels); + } + } +} + +static int __init coh901318_probe(struct platform_device *pdev) +{ + int err = 0; + struct coh901318_platform *pdata; + struct coh901318_base *base; + int irq; + struct resource *io; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + goto err_get_resource; + + /* Map DMA controller registers to virtual memory */ + if (request_mem_region(io->start, + resource_size(io), + pdev->dev.driver->name) == NULL) { + err = -EBUSY; + goto err_request_mem; + } + + pdata = pdev->dev.platform_data; + if (!pdata) + goto err_no_platformdata; + + base = kmalloc(ALIGN(sizeof(struct coh901318_base), 4) + + pdata->max_channels * + sizeof(struct coh901318_chan), + GFP_KERNEL); + if (!base) + goto err_alloc_coh_dma_channels; + + base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4); + + base->virtbase = ioremap(io->start, resource_size(io)); + if (!base->virtbase) { + err = -ENOMEM; + goto err_no_ioremap; + } + + base->dev = &pdev->dev; + base->platform = pdata; + spin_lock_init(&base->pm.lock); + base->pm.started_channels = 0; + + COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base); + + platform_set_drvdata(pdev, base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + goto err_no_irq; + + err = request_irq(irq, dma_irq_handler, IRQF_DISABLED, + "coh901318", base); + if (err) { + dev_crit(&pdev->dev, + "Cannot allocate IRQ for DMA controller!\n"); + goto err_request_irq; + } + + err = coh901318_pool_create(&base->pool, &pdev->dev, + sizeof(struct coh901318_lli), + 32); + if (err) + goto err_pool_create; + + /* init channels for device transfers */ + coh901318_base_init(&base->dma_slave, base->platform->chans_slave, + base); + + dma_cap_zero(base->dma_slave.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask); + + base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources; + base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources; + base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg; + base->dma_slave.device_tx_status = coh901318_tx_status; + base->dma_slave.device_issue_pending = coh901318_issue_pending; + base->dma_slave.device_control = coh901318_control; + base->dma_slave.dev = &pdev->dev; + + err = dma_async_device_register(&base->dma_slave); + + if (err) + goto err_register_slave; + + /* init channels for memcpy */ + coh901318_base_init(&base->dma_memcpy, base->platform->chans_memcpy, + base); + + dma_cap_zero(base->dma_memcpy.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); + + base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources; + base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources; + base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy; + base->dma_memcpy.device_tx_status = coh901318_tx_status; + base->dma_memcpy.device_issue_pending = coh901318_issue_pending; + base->dma_memcpy.device_control = coh901318_control; + base->dma_memcpy.dev = &pdev->dev; + /* + * This controller can only access address at even 32bit boundaries, + * i.e. 2^2 + */ + base->dma_memcpy.copy_align = 2; + err = dma_async_device_register(&base->dma_memcpy); + + if (err) + goto err_register_memcpy; + + dev_info(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n", + (u32) base->virtbase); + + return err; + + err_register_memcpy: + dma_async_device_unregister(&base->dma_slave); + err_register_slave: + coh901318_pool_destroy(&base->pool); + err_pool_create: + free_irq(platform_get_irq(pdev, 0), base); + err_request_irq: + err_no_irq: + iounmap(base->virtbase); + err_no_ioremap: + kfree(base); + err_alloc_coh_dma_channels: + err_no_platformdata: + release_mem_region(pdev->resource->start, + resource_size(pdev->resource)); + err_request_mem: + err_get_resource: + return err; +} + +static int __exit coh901318_remove(struct platform_device *pdev) +{ + struct coh901318_base *base = platform_get_drvdata(pdev); + + dma_async_device_unregister(&base->dma_memcpy); + dma_async_device_unregister(&base->dma_slave); + coh901318_pool_destroy(&base->pool); + free_irq(platform_get_irq(pdev, 0), base); + iounmap(base->virtbase); + kfree(base); + release_mem_region(pdev->resource->start, + resource_size(pdev->resource)); + return 0; +} + + +static struct platform_driver coh901318_driver = { + .remove = __exit_p(coh901318_remove), + .driver = { + .name = "coh901318", + }, +}; + +int __init coh901318_init(void) +{ + return platform_driver_probe(&coh901318_driver, coh901318_probe); +} +subsys_initcall(coh901318_init); + +void __exit coh901318_exit(void) +{ + platform_driver_unregister(&coh901318_driver); +} +module_exit(coh901318_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Per Friden"); diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c new file mode 100644 index 00000000..6c0e2d4c --- /dev/null +++ b/drivers/dma/coh901318_lli.c @@ -0,0 +1,313 @@ +/* + * driver/dma/coh901318_lli.c + * + * Copyright (C) 2007-2009 ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + * Support functions for handling lli for dma + * Author: Per Friden <per.friden@stericsson.com> + */ + +#include <linux/spinlock.h> +#include <linux/memory.h> +#include <linux/gfp.h> +#include <linux/dmapool.h> +#include <mach/coh901318.h> + +#include "coh901318_lli.h" + +#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG)) +#define DEBUGFS_POOL_COUNTER_RESET(pool) (pool->debugfs_pool_counter = 0) +#define DEBUGFS_POOL_COUNTER_ADD(pool, add) (pool->debugfs_pool_counter += add) +#else +#define DEBUGFS_POOL_COUNTER_RESET(pool) +#define DEBUGFS_POOL_COUNTER_ADD(pool, add) +#endif + +static struct coh901318_lli * +coh901318_lli_next(struct coh901318_lli *data) +{ + if (data == NULL || data->link_addr == 0) + return NULL; + + return (struct coh901318_lli *) data->virt_link_addr; +} + +int coh901318_pool_create(struct coh901318_pool *pool, + struct device *dev, + size_t size, size_t align) +{ + spin_lock_init(&pool->lock); + pool->dev = dev; + pool->dmapool = dma_pool_create("lli_pool", dev, size, align, 0); + + DEBUGFS_POOL_COUNTER_RESET(pool); + return 0; +} + +int coh901318_pool_destroy(struct coh901318_pool *pool) +{ + + dma_pool_destroy(pool->dmapool); + return 0; +} + +struct coh901318_lli * +coh901318_lli_alloc(struct coh901318_pool *pool, unsigned int len) +{ + int i; + struct coh901318_lli *head; + struct coh901318_lli *lli; + struct coh901318_lli *lli_prev; + dma_addr_t phy; + + if (len == 0) + goto err; + + spin_lock(&pool->lock); + + head = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy); + + if (head == NULL) + goto err; + + DEBUGFS_POOL_COUNTER_ADD(pool, 1); + + lli = head; + lli->phy_this = phy; + lli->link_addr = 0x00000000; + lli->virt_link_addr = 0x00000000U; + + for (i = 1; i < len; i++) { + lli_prev = lli; + + lli = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy); + + if (lli == NULL) + goto err_clean_up; + + DEBUGFS_POOL_COUNTER_ADD(pool, 1); + lli->phy_this = phy; + lli->link_addr = 0x00000000; + lli->virt_link_addr = 0x00000000U; + + lli_prev->link_addr = phy; + lli_prev->virt_link_addr = lli; + } + + spin_unlock(&pool->lock); + + return head; + + err: + spin_unlock(&pool->lock); + return NULL; + + err_clean_up: + lli_prev->link_addr = 0x00000000U; + spin_unlock(&pool->lock); + coh901318_lli_free(pool, &head); + return NULL; +} + +void coh901318_lli_free(struct coh901318_pool *pool, + struct coh901318_lli **lli) +{ + struct coh901318_lli *l; + struct coh901318_lli *next; + + if (lli == NULL) + return; + + l = *lli; + + if (l == NULL) + return; + + spin_lock(&pool->lock); + + while (l->link_addr) { + next = l->virt_link_addr; + dma_pool_free(pool->dmapool, l, l->phy_this); + DEBUGFS_POOL_COUNTER_ADD(pool, -1); + l = next; + } + dma_pool_free(pool->dmapool, l, l->phy_this); + DEBUGFS_POOL_COUNTER_ADD(pool, -1); + + spin_unlock(&pool->lock); + *lli = NULL; +} + +int +coh901318_lli_fill_memcpy(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t source, unsigned int size, + dma_addr_t destination, u32 ctrl_chained, + u32 ctrl_eom) +{ + int s = size; + dma_addr_t src = source; + dma_addr_t dst = destination; + + lli->src_addr = src; + lli->dst_addr = dst; + + while (lli->link_addr) { + lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE; + lli->src_addr = src; + lli->dst_addr = dst; + + s -= MAX_DMA_PACKET_SIZE; + lli = coh901318_lli_next(lli); + + src += MAX_DMA_PACKET_SIZE; + dst += MAX_DMA_PACKET_SIZE; + } + + lli->control = ctrl_eom | s; + lli->src_addr = src; + lli->dst_addr = dst; + + return 0; +} + +int +coh901318_lli_fill_single(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t buf, unsigned int size, + dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom, + enum dma_transfer_direction dir) +{ + int s = size; + dma_addr_t src; + dma_addr_t dst; + + + if (dir == DMA_MEM_TO_DEV) { + src = buf; + dst = dev_addr; + + } else if (dir == DMA_DEV_TO_MEM) { + + src = dev_addr; + dst = buf; + } else { + return -EINVAL; + } + + while (lli->link_addr) { + size_t block_size = MAX_DMA_PACKET_SIZE; + lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE; + + /* If we are on the next-to-final block and there will + * be less than half a DMA packet left for the last + * block, then we want to make this block a little + * smaller to balance the sizes. This is meant to + * avoid too small transfers if the buffer size is + * (MAX_DMA_PACKET_SIZE*N + 1) */ + if (s < (MAX_DMA_PACKET_SIZE + MAX_DMA_PACKET_SIZE/2)) + block_size = MAX_DMA_PACKET_SIZE/2; + + s -= block_size; + lli->src_addr = src; + lli->dst_addr = dst; + + lli = coh901318_lli_next(lli); + + if (dir == DMA_MEM_TO_DEV) + src += block_size; + else if (dir == DMA_DEV_TO_MEM) + dst += block_size; + } + + lli->control = ctrl_eom | s; + lli->src_addr = src; + lli->dst_addr = dst; + + return 0; +} + +int +coh901318_lli_fill_sg(struct coh901318_pool *pool, + struct coh901318_lli *lli, + struct scatterlist *sgl, unsigned int nents, + dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl, + u32 ctrl_last, + enum dma_transfer_direction dir, u32 ctrl_irq_mask) +{ + int i; + struct scatterlist *sg; + u32 ctrl_sg; + dma_addr_t src = 0; + dma_addr_t dst = 0; + u32 bytes_to_transfer; + u32 elem_size; + + if (lli == NULL) + goto err; + + spin_lock(&pool->lock); + + if (dir == DMA_MEM_TO_DEV) + dst = dev_addr; + else if (dir == DMA_DEV_TO_MEM) + src = dev_addr; + else + goto err; + + for_each_sg(sgl, sg, nents, i) { + if (sg_is_chain(sg)) { + /* sg continues to the next sg-element don't + * send ctrl_finish until the last + * sg-element in the chain + */ + ctrl_sg = ctrl_chained; + } else if (i == nents - 1) + ctrl_sg = ctrl_last; + else + ctrl_sg = ctrl ? ctrl : ctrl_last; + + + if (dir == DMA_MEM_TO_DEV) + /* increment source address */ + src = sg_phys(sg); + else + /* increment destination address */ + dst = sg_phys(sg); + + bytes_to_transfer = sg_dma_len(sg); + + while (bytes_to_transfer) { + u32 val; + + if (bytes_to_transfer > MAX_DMA_PACKET_SIZE) { + elem_size = MAX_DMA_PACKET_SIZE; + val = ctrl_chained; + } else { + elem_size = bytes_to_transfer; + val = ctrl_sg; + } + + lli->control = val | elem_size; + lli->src_addr = src; + lli->dst_addr = dst; + + if (dir == DMA_DEV_TO_MEM) + dst += elem_size; + else + src += elem_size; + + BUG_ON(lli->link_addr & 3); + + bytes_to_transfer -= elem_size; + lli = coh901318_lli_next(lli); + } + + } + spin_unlock(&pool->lock); + + return 0; + err: + spin_unlock(&pool->lock); + return -EINVAL; +} diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h new file mode 100644 index 00000000..abff3714 --- /dev/null +++ b/drivers/dma/coh901318_lli.h @@ -0,0 +1,124 @@ +/* + * driver/dma/coh901318_lli.h + * + * Copyright (C) 2007-2009 ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + * Support functions for handling lli for coh901318 + * Author: Per Friden <per.friden@stericsson.com> + */ + +#ifndef COH901318_LLI_H +#define COH901318_LLI_H + +#include <mach/coh901318.h> + +struct device; + +struct coh901318_pool { + spinlock_t lock; + struct dma_pool *dmapool; + struct device *dev; + +#ifdef CONFIG_DEBUG_FS + int debugfs_pool_counter; +#endif +}; + +struct device; +/** + * coh901318_pool_create() - Creates an dma pool for lli:s + * @pool: pool handle + * @dev: dma device + * @lli_nbr: number of lli:s in the pool + * @algin: address alignemtn of lli:s + * returns 0 on success otherwise none zero + */ +int coh901318_pool_create(struct coh901318_pool *pool, + struct device *dev, + size_t lli_nbr, size_t align); + +/** + * coh901318_pool_destroy() - Destroys the dma pool + * @pool: pool handle + * returns 0 on success otherwise none zero + */ +int coh901318_pool_destroy(struct coh901318_pool *pool); + +/** + * coh901318_lli_alloc() - Allocates a linked list + * + * @pool: pool handle + * @len: length to list + * return: none NULL if success otherwise NULL + */ +struct coh901318_lli * +coh901318_lli_alloc(struct coh901318_pool *pool, + unsigned int len); + +/** + * coh901318_lli_free() - Returns the linked list items to the pool + * @pool: pool handle + * @lli: reference to lli pointer to be freed + */ +void coh901318_lli_free(struct coh901318_pool *pool, + struct coh901318_lli **lli); + +/** + * coh901318_lli_fill_memcpy() - Prepares the lli:s for dma memcpy + * @pool: pool handle + * @lli: allocated lli + * @src: src address + * @size: transfer size + * @dst: destination address + * @ctrl_chained: ctrl for chained lli + * @ctrl_last: ctrl for the last lli + * returns number of CPU interrupts for the lli, negative on error. + */ +int +coh901318_lli_fill_memcpy(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t src, unsigned int size, + dma_addr_t dst, u32 ctrl_chained, u32 ctrl_last); + +/** + * coh901318_lli_fill_single() - Prepares the lli:s for dma single transfer + * @pool: pool handle + * @lli: allocated lli + * @buf: transfer buffer + * @size: transfer size + * @dev_addr: address of periphal + * @ctrl_chained: ctrl for chained lli + * @ctrl_last: ctrl for the last lli + * @dir: direction of transfer (to or from device) + * returns number of CPU interrupts for the lli, negative on error. + */ +int +coh901318_lli_fill_single(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t buf, unsigned int size, + dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last, + enum dma_transfer_direction dir); + +/** + * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer + * @pool: pool handle + * @lli: allocated lli + * @sg: scatter gather list + * @nents: number of entries in sg + * @dev_addr: address of periphal + * @ctrl_chained: ctrl for chained lli + * @ctrl: ctrl of middle lli + * @ctrl_last: ctrl for the last lli + * @dir: direction of transfer (to or from device) + * @ctrl_irq_mask: ctrl mask for CPU interrupt + * returns number of CPU interrupts for the lli, negative on error. + */ +int +coh901318_lli_fill_sg(struct coh901318_pool *pool, + struct coh901318_lli *lli, + struct scatterlist *sg, unsigned int nents, + dma_addr_t dev_addr, u32 ctrl_chained, + u32 ctrl, u32 ctrl_last, + enum dma_transfer_direction dir, u32 ctrl_irq_mask); + +#endif /* COH901318_LLI_H */ diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c new file mode 100644 index 00000000..2397f6f4 --- /dev/null +++ b/drivers/dma/dmaengine.c @@ -0,0 +1,1071 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code implements the DMA subsystem. It provides a HW-neutral interface + * for other kernel code to use asynchronous memory copy capabilities, + * if present, and allows different HW DMA drivers to register as providing + * this capability. + * + * Due to the fact we are accelerating what is already a relatively fast + * operation, the code goes to great lengths to avoid additional overhead, + * such as locking. + * + * LOCKING: + * + * The subsystem keeps a global list of dma_device structs it is protected by a + * mutex, dma_list_mutex. + * + * A subsystem can get access to a channel by calling dmaengine_get() followed + * by dma_find_channel(), or if it has need for an exclusive channel it can call + * dma_request_channel(). Once a channel is allocated a reference is taken + * against its corresponding driver to disable removal. + * + * Each device has a channels list, which runs unlocked but is never modified + * once the device is registered, it's just setup by the driver. + * + * See Documentation/dmaengine.txt for more details + */ + +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/hardirq.h> +#include <linux/spinlock.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/mutex.h> +#include <linux/jiffies.h> +#include <linux/rculist.h> +#include <linux/idr.h> +#include <linux/slab.h> + +static DEFINE_MUTEX(dma_list_mutex); +static DEFINE_IDR(dma_idr); +static LIST_HEAD(dma_device_list); +static long dmaengine_ref_count; + +/* --- sysfs implementation --- */ + +/** + * dev_to_dma_chan - convert a device pointer to the its sysfs container object + * @dev - device node + * + * Must be called under dma_list_mutex + */ +static struct dma_chan *dev_to_dma_chan(struct device *dev) +{ + struct dma_chan_dev *chan_dev; + + chan_dev = container_of(dev, typeof(*chan_dev), device); + return chan_dev->chan; +} + +static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dma_chan *chan; + unsigned long count = 0; + int i; + int err; + + mutex_lock(&dma_list_mutex); + chan = dev_to_dma_chan(dev); + if (chan) { + for_each_possible_cpu(i) + count += per_cpu_ptr(chan->local, i)->memcpy_count; + err = sprintf(buf, "%lu\n", count); + } else + err = -ENODEV; + mutex_unlock(&dma_list_mutex); + + return err; +} + +static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dma_chan *chan; + unsigned long count = 0; + int i; + int err; + + mutex_lock(&dma_list_mutex); + chan = dev_to_dma_chan(dev); + if (chan) { + for_each_possible_cpu(i) + count += per_cpu_ptr(chan->local, i)->bytes_transferred; + err = sprintf(buf, "%lu\n", count); + } else + err = -ENODEV; + mutex_unlock(&dma_list_mutex); + + return err; +} + +static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dma_chan *chan; + int err; + + mutex_lock(&dma_list_mutex); + chan = dev_to_dma_chan(dev); + if (chan) + err = sprintf(buf, "%d\n", chan->client_count); + else + err = -ENODEV; + mutex_unlock(&dma_list_mutex); + + return err; +} + +static struct device_attribute dma_attrs[] = { + __ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL), + __ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL), + __ATTR(in_use, S_IRUGO, show_in_use, NULL), + __ATTR_NULL +}; + +static void chan_dev_release(struct device *dev) +{ + struct dma_chan_dev *chan_dev; + + chan_dev = container_of(dev, typeof(*chan_dev), device); + if (atomic_dec_and_test(chan_dev->idr_ref)) { + mutex_lock(&dma_list_mutex); + idr_remove(&dma_idr, chan_dev->dev_id); + mutex_unlock(&dma_list_mutex); + kfree(chan_dev->idr_ref); + } + kfree(chan_dev); +} + +static struct class dma_devclass = { + .name = "dma", + .dev_attrs = dma_attrs, + .dev_release = chan_dev_release, +}; + +/* --- client and device registration --- */ + +#define dma_device_satisfies_mask(device, mask) \ + __dma_device_satisfies_mask((device), &(mask)) +static int +__dma_device_satisfies_mask(struct dma_device *device, dma_cap_mask_t *want) +{ + dma_cap_mask_t has; + + bitmap_and(has.bits, want->bits, device->cap_mask.bits, + DMA_TX_TYPE_END); + return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); +} + +static struct module *dma_chan_to_owner(struct dma_chan *chan) +{ + return chan->device->dev->driver->owner; +} + +/** + * balance_ref_count - catch up the channel reference count + * @chan - channel to balance ->client_count versus dmaengine_ref_count + * + * balance_ref_count must be called under dma_list_mutex + */ +static void balance_ref_count(struct dma_chan *chan) +{ + struct module *owner = dma_chan_to_owner(chan); + + while (chan->client_count < dmaengine_ref_count) { + __module_get(owner); + chan->client_count++; + } +} + +/** + * dma_chan_get - try to grab a dma channel's parent driver module + * @chan - channel to grab + * + * Must be called under dma_list_mutex + */ +static int dma_chan_get(struct dma_chan *chan) +{ + int err = -ENODEV; + struct module *owner = dma_chan_to_owner(chan); + + if (chan->client_count) { + __module_get(owner); + err = 0; + } else if (try_module_get(owner)) + err = 0; + + if (err == 0) + chan->client_count++; + + /* allocate upon first client reference */ + if (chan->client_count == 1 && err == 0) { + int desc_cnt = chan->device->device_alloc_chan_resources(chan); + + if (desc_cnt < 0) { + err = desc_cnt; + chan->client_count = 0; + module_put(owner); + } else if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask)) + balance_ref_count(chan); + } + + return err; +} + +/** + * dma_chan_put - drop a reference to a dma channel's parent driver module + * @chan - channel to release + * + * Must be called under dma_list_mutex + */ +static void dma_chan_put(struct dma_chan *chan) +{ + if (!chan->client_count) + return; /* this channel failed alloc_chan_resources */ + chan->client_count--; + module_put(dma_chan_to_owner(chan)); + if (chan->client_count == 0) + chan->device->device_free_chan_resources(chan); +} + +enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) +{ + enum dma_status status; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); + + dma_async_issue_pending(chan); + do { + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + printk(KERN_ERR "dma_sync_wait_timeout!\n"); + return DMA_ERROR; + } + } while (status == DMA_IN_PROGRESS); + + return status; +} +EXPORT_SYMBOL(dma_sync_wait); + +/** + * dma_cap_mask_all - enable iteration over all operation types + */ +static dma_cap_mask_t dma_cap_mask_all; + +/** + * dma_chan_tbl_ent - tracks channel allocations per core/operation + * @chan - associated channel for this entry + */ +struct dma_chan_tbl_ent { + struct dma_chan *chan; +}; + +/** + * channel_table - percpu lookup table for memory-to-memory offload providers + */ +static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END]; + +static int __init dma_channel_table_init(void) +{ + enum dma_transaction_type cap; + int err = 0; + + bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); + + /* 'interrupt', 'private', and 'slave' are channel capabilities, + * but are not associated with an operation so they do not need + * an entry in the channel_table + */ + clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); + clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits); + clear_bit(DMA_SLAVE, dma_cap_mask_all.bits); + + for_each_dma_cap_mask(cap, dma_cap_mask_all) { + channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent); + if (!channel_table[cap]) { + err = -ENOMEM; + break; + } + } + + if (err) { + pr_err("dmaengine: initialization failure\n"); + for_each_dma_cap_mask(cap, dma_cap_mask_all) + if (channel_table[cap]) + free_percpu(channel_table[cap]); + } + + return err; +} +arch_initcall(dma_channel_table_init); + +/** + * dma_find_channel - find a channel to carry out the operation + * @tx_type: transaction type + */ +struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type) +{ + return this_cpu_read(channel_table[tx_type]->chan); +} +EXPORT_SYMBOL(dma_find_channel); + +/* + * net_dma_find_channel - find a channel for net_dma + * net_dma has alignment requirements + */ +struct dma_chan *net_dma_find_channel(void) +{ + struct dma_chan *chan = dma_find_channel(DMA_MEMCPY); + if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1)) + return NULL; + + return chan; +} +EXPORT_SYMBOL(net_dma_find_channel); + +/** + * dma_issue_pending_all - flush all pending operations across all channels + */ +void dma_issue_pending_all(void) +{ + struct dma_device *device; + struct dma_chan *chan; + + rcu_read_lock(); + list_for_each_entry_rcu(device, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) + if (chan->client_count) + device->device_issue_pending(chan); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(dma_issue_pending_all); + +/** + * nth_chan - returns the nth channel of the given capability + * @cap: capability to match + * @n: nth channel desired + * + * Defaults to returning the channel with the desired capability and the + * lowest reference count when 'n' cannot be satisfied. Must be called + * under dma_list_mutex. + */ +static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n) +{ + struct dma_device *device; + struct dma_chan *chan; + struct dma_chan *ret = NULL; + struct dma_chan *min = NULL; + + list_for_each_entry(device, &dma_device_list, global_node) { + if (!dma_has_cap(cap, device->cap_mask) || + dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) { + if (!chan->client_count) + continue; + if (!min) + min = chan; + else if (chan->table_count < min->table_count) + min = chan; + + if (n-- == 0) { + ret = chan; + break; /* done */ + } + } + if (ret) + break; /* done */ + } + + if (!ret) + ret = min; + + if (ret) + ret->table_count++; + + return ret; +} + +/** + * dma_channel_rebalance - redistribute the available channels + * + * Optimize for cpu isolation (each cpu gets a dedicated channel for an + * operation type) in the SMP case, and operation isolation (avoid + * multi-tasking channels) in the non-SMP case. Must be called under + * dma_list_mutex. + */ +static void dma_channel_rebalance(void) +{ + struct dma_chan *chan; + struct dma_device *device; + int cpu; + int cap; + int n; + + /* undo the last distribution */ + for_each_dma_cap_mask(cap, dma_cap_mask_all) + for_each_possible_cpu(cpu) + per_cpu_ptr(channel_table[cap], cpu)->chan = NULL; + + list_for_each_entry(device, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) + chan->table_count = 0; + } + + /* don't populate the channel_table if no clients are available */ + if (!dmaengine_ref_count) + return; + + /* redistribute available channels */ + n = 0; + for_each_dma_cap_mask(cap, dma_cap_mask_all) + for_each_online_cpu(cpu) { + if (num_possible_cpus() > 1) + chan = nth_chan(cap, n++); + else + chan = nth_chan(cap, -1); + + per_cpu_ptr(channel_table[cap], cpu)->chan = chan; + } +} + +static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_device *dev, + dma_filter_fn fn, void *fn_param) +{ + struct dma_chan *chan; + + if (!__dma_device_satisfies_mask(dev, mask)) { + pr_debug("%s: wrong capabilities\n", __func__); + return NULL; + } + /* devices with multiple channels need special handling as we need to + * ensure that all channels are either private or public. + */ + if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask)) + list_for_each_entry(chan, &dev->channels, device_node) { + /* some channels are already publicly allocated */ + if (chan->client_count) + return NULL; + } + + list_for_each_entry(chan, &dev->channels, device_node) { + if (chan->client_count) { + pr_debug("%s: %s busy\n", + __func__, dma_chan_name(chan)); + continue; + } + if (fn && !fn(chan, fn_param)) { + pr_debug("%s: %s filter said false\n", + __func__, dma_chan_name(chan)); + continue; + } + return chan; + } + + return NULL; +} + +/** + * dma_request_channel - try to allocate an exclusive channel + * @mask: capabilities that the channel must satisfy + * @fn: optional callback to disposition available channels + * @fn_param: opaque parameter to pass to dma_filter_fn + */ +struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param) +{ + struct dma_device *device, *_d; + struct dma_chan *chan = NULL; + int err; + + /* Find a channel */ + mutex_lock(&dma_list_mutex); + list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { + chan = private_candidate(mask, device, fn, fn_param); + if (chan) { + /* Found a suitable channel, try to grab, prep, and + * return it. We first set DMA_PRIVATE to disable + * balance_ref_count as this channel will not be + * published in the general-purpose allocator + */ + dma_cap_set(DMA_PRIVATE, device->cap_mask); + device->privatecnt++; + err = dma_chan_get(chan); + + if (err == -ENODEV) { + pr_debug("%s: %s module removed\n", __func__, + dma_chan_name(chan)); + list_del_rcu(&device->global_node); + } else if (err) + pr_debug("%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); + else + break; + if (--device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, device->cap_mask); + chan = NULL; + } + } + mutex_unlock(&dma_list_mutex); + + pr_debug("%s: %s (%s)\n", __func__, chan ? "success" : "fail", + chan ? dma_chan_name(chan) : NULL); + + return chan; +} +EXPORT_SYMBOL_GPL(__dma_request_channel); + +void dma_release_channel(struct dma_chan *chan) +{ + mutex_lock(&dma_list_mutex); + WARN_ONCE(chan->client_count != 1, + "chan reference count %d != 1\n", chan->client_count); + dma_chan_put(chan); + /* drop PRIVATE cap enabled by __dma_request_channel() */ + if (--chan->device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL_GPL(dma_release_channel); + +/** + * dmaengine_get - register interest in dma_channels + */ +void dmaengine_get(void) +{ + struct dma_device *device, *_d; + struct dma_chan *chan; + int err; + + mutex_lock(&dma_list_mutex); + dmaengine_ref_count++; + + /* try to grab channels */ + list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) { + err = dma_chan_get(chan); + if (err == -ENODEV) { + /* module removed before we could use it */ + list_del_rcu(&device->global_node); + break; + } else if (err) + pr_err("%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); + } + } + + /* if this is the first reference and there were channels + * waiting we need to rebalance to get those channels + * incorporated into the channel table + */ + if (dmaengine_ref_count == 1) + dma_channel_rebalance(); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dmaengine_get); + +/** + * dmaengine_put - let dma drivers be removed when ref_count == 0 + */ +void dmaengine_put(void) +{ + struct dma_device *device; + struct dma_chan *chan; + + mutex_lock(&dma_list_mutex); + dmaengine_ref_count--; + BUG_ON(dmaengine_ref_count < 0); + /* drop channel references */ + list_for_each_entry(device, &dma_device_list, global_node) { + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + continue; + list_for_each_entry(chan, &device->channels, device_node) + dma_chan_put(chan); + } + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dmaengine_put); + +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) + if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask)) + return false; + #endif + #endif + + #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask)) + return false; + #endif + #endif + + return true; +} + +static int get_dma_id(struct dma_device *device) +{ + int rc; + + idr_retry: + if (!idr_pre_get(&dma_idr, GFP_KERNEL)) + return -ENOMEM; + mutex_lock(&dma_list_mutex); + rc = idr_get_new(&dma_idr, NULL, &device->dev_id); + mutex_unlock(&dma_list_mutex); + if (rc == -EAGAIN) + goto idr_retry; + else if (rc != 0) + return rc; + + return 0; +} + +/** + * dma_async_device_register - registers DMA devices found + * @device: &dma_device + */ +int dma_async_device_register(struct dma_device *device) +{ + int chancnt = 0, rc; + struct dma_chan* chan; + atomic_t *idr_ref; + + if (!device) + return -ENODEV; + + /* validate device routines */ + BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) && + !device->device_prep_dma_memcpy); + BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && + !device->device_prep_dma_xor); + BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && + !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && + !device->device_prep_dma_memset); + BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && + !device->device_prep_dma_interrupt); + BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && + !device->device_prep_dma_sg); + BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) && + !device->device_prep_dma_cyclic); + BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && + !device->device_control); + BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) && + !device->device_prep_interleaved_dma); + + BUG_ON(!device->device_alloc_chan_resources); + BUG_ON(!device->device_free_chan_resources); + BUG_ON(!device->device_tx_status); + BUG_ON(!device->device_issue_pending); + BUG_ON(!device->dev); + + /* note: this only matters in the + * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + + idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); + if (!idr_ref) + return -ENOMEM; + rc = get_dma_id(device); + if (rc != 0) { + kfree(idr_ref); + return rc; + } + + atomic_set(idr_ref, 0); + + /* represent channels in sysfs. Probably want devs too */ + list_for_each_entry(chan, &device->channels, device_node) { + rc = -ENOMEM; + chan->local = alloc_percpu(typeof(*chan->local)); + if (chan->local == NULL) + goto err_out; + chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL); + if (chan->dev == NULL) { + free_percpu(chan->local); + chan->local = NULL; + goto err_out; + } + + chan->chan_id = chancnt++; + chan->dev->device.class = &dma_devclass; + chan->dev->device.parent = device->dev; + chan->dev->chan = chan; + chan->dev->idr_ref = idr_ref; + chan->dev->dev_id = device->dev_id; + atomic_inc(idr_ref); + dev_set_name(&chan->dev->device, "dma%dchan%d", + device->dev_id, chan->chan_id); + + rc = device_register(&chan->dev->device); + if (rc) { + free_percpu(chan->local); + chan->local = NULL; + kfree(chan->dev); + atomic_dec(idr_ref); + goto err_out; + } + chan->client_count = 0; + } + device->chancnt = chancnt; + + mutex_lock(&dma_list_mutex); + /* take references on public channels */ + if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask)) + list_for_each_entry(chan, &device->channels, device_node) { + /* if clients are already waiting for channels we need + * to take references on their behalf + */ + if (dma_chan_get(chan) == -ENODEV) { + /* note we can only get here for the first + * channel as the remaining channels are + * guaranteed to get a reference + */ + rc = -ENODEV; + mutex_unlock(&dma_list_mutex); + goto err_out; + } + } + list_add_tail_rcu(&device->global_node, &dma_device_list); + if (dma_has_cap(DMA_PRIVATE, device->cap_mask)) + device->privatecnt++; /* Always private */ + dma_channel_rebalance(); + mutex_unlock(&dma_list_mutex); + + return 0; + +err_out: + /* if we never registered a channel just release the idr */ + if (atomic_read(idr_ref) == 0) { + mutex_lock(&dma_list_mutex); + idr_remove(&dma_idr, device->dev_id); + mutex_unlock(&dma_list_mutex); + kfree(idr_ref); + return rc; + } + + list_for_each_entry(chan, &device->channels, device_node) { + if (chan->local == NULL) + continue; + mutex_lock(&dma_list_mutex); + chan->dev->chan = NULL; + mutex_unlock(&dma_list_mutex); + device_unregister(&chan->dev->device); + free_percpu(chan->local); + } + return rc; +} +EXPORT_SYMBOL(dma_async_device_register); + +/** + * dma_async_device_unregister - unregister a DMA device + * @device: &dma_device + * + * This routine is called by dma driver exit routines, dmaengine holds module + * references to prevent it being called while channels are in use. + */ +void dma_async_device_unregister(struct dma_device *device) +{ + struct dma_chan *chan; + + mutex_lock(&dma_list_mutex); + list_del_rcu(&device->global_node); + dma_channel_rebalance(); + mutex_unlock(&dma_list_mutex); + + list_for_each_entry(chan, &device->channels, device_node) { + WARN_ONCE(chan->client_count, + "%s called while %d clients hold a reference\n", + __func__, chan->client_count); + mutex_lock(&dma_list_mutex); + chan->dev->chan = NULL; + mutex_unlock(&dma_list_mutex); + device_unregister(&chan->dev->device); + free_percpu(chan->local); + } +} +EXPORT_SYMBOL(dma_async_device_unregister); + +/** + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses + * @chan: DMA channel to offload copy to + * @dest: destination address (virtual) + * @src: source address (virtual) + * @len: length + * + * Both @dest and @src must be mappable to a bus address according to the + * DMA mapping API rules for streaming mappings. + * Both @dest and @src must stay memory resident (kernel memory or locked + * user space pages). + */ +dma_cookie_t +dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, + void *src, size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + unsigned long flags; + + dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); + dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); + flags = DMA_CTRL_ACK | + DMA_COMPL_SRC_UNMAP_SINGLE | + DMA_COMPL_DEST_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); + + if (!tx) { + dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); + dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + return -ENOMEM; + } + + tx->callback = NULL; + cookie = tx->tx_submit(tx); + + preempt_disable(); + __this_cpu_add(chan->local->bytes_transferred, len); + __this_cpu_inc(chan->local->memcpy_count); + preempt_enable(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy from address to page + * @chan: DMA channel to offload copy to + * @page: destination page + * @offset: offset in page to copy to + * @kdata: source address (virtual) + * @len: length + * + * Both @page/@offset and @kdata must be mappable to a bus address according + * to the DMA mapping API rules for streaming mappings. + * Both @page/@offset and @kdata must stay memory resident (kernel memory or + * locked user space pages) + */ +dma_cookie_t +dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, + unsigned int offset, void *kdata, size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + unsigned long flags; + + dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); + dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); + flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); + + if (!tx) { + dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); + dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + return -ENOMEM; + } + + tx->callback = NULL; + cookie = tx->tx_submit(tx); + + preempt_disable(); + __this_cpu_add(chan->local->bytes_transferred, len); + __this_cpu_inc(chan->local->memcpy_count); + preempt_enable(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); + +/** + * dma_async_memcpy_pg_to_pg - offloaded copy from page to page + * @chan: DMA channel to offload copy to + * @dest_pg: destination page + * @dest_off: offset in page to copy to + * @src_pg: source page + * @src_off: offset in page to copy from + * @len: length + * + * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus + * address according to the DMA mapping API rules for streaming mappings. + * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident + * (kernel memory or locked user space pages). + */ +dma_cookie_t +dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, + unsigned int dest_off, struct page *src_pg, unsigned int src_off, + size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + unsigned long flags; + + dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); + dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len, + DMA_FROM_DEVICE); + flags = DMA_CTRL_ACK; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); + + if (!tx) { + dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE); + dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + return -ENOMEM; + } + + tx->callback = NULL; + cookie = tx->tx_submit(tx); + + preempt_disable(); + __this_cpu_add(chan->local->bytes_transferred, len); + __this_cpu_inc(chan->local->memcpy_count); + preempt_enable(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); + +void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, + struct dma_chan *chan) +{ + tx->chan = chan; + #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH + spin_lock_init(&tx->lock); + #endif +} +EXPORT_SYMBOL(dma_async_tx_descriptor_init); + +/* dma_wait_for_async_tx - spin wait for a transaction to complete + * @tx: in-flight transaction to wait on + */ +enum dma_status +dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); + + if (!tx) + return DMA_SUCCESS; + + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); +} +EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); + +/* dma_run_dependencies - helper routine for dma drivers to process + * (start) dependent operations on their target channel + * @tx: transaction with dependencies + */ +void dma_run_dependencies(struct dma_async_tx_descriptor *tx) +{ + struct dma_async_tx_descriptor *dep = txd_next(tx); + struct dma_async_tx_descriptor *dep_next; + struct dma_chan *chan; + + if (!dep) + return; + + /* we'll submit tx->next now, so clear the link */ + txd_clear_next(tx); + chan = dep->chan; + + /* keep submitting up until a channel switch is detected + * in that case we will be called again as a result of + * processing the interrupt from async_tx_channel_switch + */ + for (; dep; dep = dep_next) { + txd_lock(dep); + txd_clear_parent(dep); + dep_next = txd_next(dep); + if (dep_next && dep_next->chan == chan) + txd_clear_next(dep); /* ->next will be submitted */ + else + dep_next = NULL; /* submit current dep and terminate */ + txd_unlock(dep); + + dep->tx_submit(dep); + } + + chan->device->device_issue_pending(chan); +} +EXPORT_SYMBOL_GPL(dma_run_dependencies); + +static int __init dma_bus_init(void) +{ + return class_register(&dma_devclass); +} +arch_initcall(dma_bus_init); + + diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h new file mode 100644 index 00000000..17f983a4 --- /dev/null +++ b/drivers/dma/dmaengine.h @@ -0,0 +1,89 @@ +/* + * The contents of this file are private to DMA engine drivers, and is not + * part of the API to be used by DMA engine users. + */ +#ifndef DMAENGINE_H +#define DMAENGINE_H + +#include <linux/bug.h> +#include <linux/dmaengine.h> + +/** + * dma_cookie_init - initialize the cookies for a DMA channel + * @chan: dma channel to initialize + */ +static inline void dma_cookie_init(struct dma_chan *chan) +{ + chan->cookie = DMA_MIN_COOKIE; + chan->completed_cookie = DMA_MIN_COOKIE; +} + +/** + * dma_cookie_assign - assign a DMA engine cookie to the descriptor + * @tx: descriptor needing cookie + * + * Assign a unique non-zero per-channel cookie to the descriptor. + * Note: caller is expected to hold a lock to prevent concurrency. + */ +static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *chan = tx->chan; + dma_cookie_t cookie; + + cookie = chan->cookie + 1; + if (cookie < DMA_MIN_COOKIE) + cookie = DMA_MIN_COOKIE; + tx->cookie = chan->cookie = cookie; + + return cookie; +} + +/** + * dma_cookie_complete - complete a descriptor + * @tx: descriptor to complete + * + * Mark this descriptor complete by updating the channels completed + * cookie marker. Zero the descriptors cookie to prevent accidental + * repeated completions. + * + * Note: caller is expected to hold a lock to prevent concurrency. + */ +static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx) +{ + BUG_ON(tx->cookie < DMA_MIN_COOKIE); + tx->chan->completed_cookie = tx->cookie; + tx->cookie = 0; +} + +/** + * dma_cookie_status - report cookie status + * @chan: dma channel + * @cookie: cookie we are interested in + * @state: dma_tx_state structure to return last/used cookies + * + * Report the status of the cookie, filling in the state structure if + * non-NULL. No locking is required. + */ +static inline enum dma_status dma_cookie_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + dma_cookie_t used, complete; + + used = chan->cookie; + complete = chan->completed_cookie; + barrier(); + if (state) { + state->last = complete; + state->used = used; + state->residue = 0; + } + return dma_async_is_complete(cookie, complete, used); +} + +static inline void dma_set_residue(struct dma_tx_state *state, u32 residue) +{ + if (state) + state->residue = residue; +} + +#endif diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c new file mode 100644 index 00000000..24225f0f --- /dev/null +++ b/drivers/dma/dmatest.c @@ -0,0 +1,666 @@ +/* + * DMA Engine test module + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/freezer.h> +#include <linux/init.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/wait.h> + +static unsigned int test_buf_size = 16384; +module_param(test_buf_size, uint, S_IRUGO); +MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer"); + +static char test_channel[20]; +module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO); +MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)"); + +static char test_device[20]; +module_param_string(device, test_device, sizeof(test_device), S_IRUGO); +MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)"); + +static unsigned int threads_per_chan = 1; +module_param(threads_per_chan, uint, S_IRUGO); +MODULE_PARM_DESC(threads_per_chan, + "Number of threads to start per channel (default: 1)"); + +static unsigned int max_channels; +module_param(max_channels, uint, S_IRUGO); +MODULE_PARM_DESC(max_channels, + "Maximum number of channels to use (default: all)"); + +static unsigned int iterations; +module_param(iterations, uint, S_IRUGO); +MODULE_PARM_DESC(iterations, + "Iterations before stopping test (default: infinite)"); + +static unsigned int xor_sources = 3; +module_param(xor_sources, uint, S_IRUGO); +MODULE_PARM_DESC(xor_sources, + "Number of xor source buffers (default: 3)"); + +static unsigned int pq_sources = 3; +module_param(pq_sources, uint, S_IRUGO); +MODULE_PARM_DESC(pq_sources, + "Number of p+q source buffers (default: 3)"); + +static int timeout = 3000; +module_param(timeout, uint, S_IRUGO); +MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), " + "Pass -1 for infinite timeout"); + +/* + * Initialization patterns. All bytes in the source buffer has bit 7 + * set, all bytes in the destination buffer has bit 7 cleared. + * + * Bit 6 is set for all bytes which are to be copied by the DMA + * engine. Bit 5 is set for all bytes which are to be overwritten by + * the DMA engine. + * + * The remaining bits are the inverse of a counter which increments by + * one for each byte address. + */ +#define PATTERN_SRC 0x80 +#define PATTERN_DST 0x00 +#define PATTERN_COPY 0x40 +#define PATTERN_OVERWRITE 0x20 +#define PATTERN_COUNT_MASK 0x1f + +struct dmatest_thread { + struct list_head node; + struct task_struct *task; + struct dma_chan *chan; + u8 **srcs; + u8 **dsts; + enum dma_transaction_type type; +}; + +struct dmatest_chan { + struct list_head node; + struct dma_chan *chan; + struct list_head threads; +}; + +/* + * These are protected by dma_list_mutex since they're only used by + * the DMA filter function callback + */ +static LIST_HEAD(dmatest_channels); +static unsigned int nr_channels; + +static bool dmatest_match_channel(struct dma_chan *chan) +{ + if (test_channel[0] == '\0') + return true; + return strcmp(dma_chan_name(chan), test_channel) == 0; +} + +static bool dmatest_match_device(struct dma_device *device) +{ + if (test_device[0] == '\0') + return true; + return strcmp(dev_name(device->dev), test_device) == 0; +} + +static unsigned long dmatest_random(void) +{ + unsigned long buf; + + get_random_bytes(&buf, sizeof(buf)); + return buf; +} + +static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len) +{ + unsigned int i; + u8 *buf; + + for (; (buf = *bufs); bufs++) { + for (i = 0; i < start; i++) + buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); + for ( ; i < start + len; i++) + buf[i] = PATTERN_SRC | PATTERN_COPY + | (~i & PATTERN_COUNT_MASK); + for ( ; i < test_buf_size; i++) + buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); + buf++; + } +} + +static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len) +{ + unsigned int i; + u8 *buf; + + for (; (buf = *bufs); bufs++) { + for (i = 0; i < start; i++) + buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); + for ( ; i < start + len; i++) + buf[i] = PATTERN_DST | PATTERN_OVERWRITE + | (~i & PATTERN_COUNT_MASK); + for ( ; i < test_buf_size; i++) + buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); + } +} + +static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, + unsigned int counter, bool is_srcbuf) +{ + u8 diff = actual ^ pattern; + u8 expected = pattern | (~counter & PATTERN_COUNT_MASK); + const char *thread_name = current->comm; + + if (is_srcbuf) + pr_warning("%s: srcbuf[0x%x] overwritten!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if ((pattern & PATTERN_COPY) + && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) + pr_warning("%s: dstbuf[0x%x] not copied!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if (diff & PATTERN_SRC) + pr_warning("%s: dstbuf[0x%x] was copied!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else + pr_warning("%s: dstbuf[0x%x] mismatch!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); +} + +static unsigned int dmatest_verify(u8 **bufs, unsigned int start, + unsigned int end, unsigned int counter, u8 pattern, + bool is_srcbuf) +{ + unsigned int i; + unsigned int error_count = 0; + u8 actual; + u8 expected; + u8 *buf; + unsigned int counter_orig = counter; + + for (; (buf = *bufs); bufs++) { + counter = counter_orig; + for (i = start; i < end; i++) { + actual = buf[i]; + expected = pattern | (~counter & PATTERN_COUNT_MASK); + if (actual != expected) { + if (error_count < 32) + dmatest_mismatch(actual, pattern, i, + counter, is_srcbuf); + error_count++; + } + counter++; + } + } + + if (error_count > 32) + pr_warning("%s: %u errors suppressed\n", + current->comm, error_count - 32); + + return error_count; +} + +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + +static void dmatest_callback(void *arg) +{ + struct dmatest_done *done = arg; + + done->done = true; + wake_up_all(done->wait); +} + +/* + * This function repeatedly tests DMA transfers of various lengths and + * offsets for a given operation type until it is told to exit by + * kthread_stop(). There may be multiple threads running this function + * in parallel for a single channel, and there may be multiple channels + * being tested in parallel. + * + * Before each test, the source and destination buffer is initialized + * with a known pattern. This pattern is different depending on + * whether it's in an area which is supposed to be copied or + * overwritten, and different in the source and destination buffers. + * So if the DMA engine doesn't copy exactly what we tell it to copy, + * we'll notice. + */ +static int dmatest_func(void *data) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); + struct dmatest_thread *thread = data; + struct dmatest_done done = { .wait = &done_wait }; + struct dma_chan *chan; + const char *thread_name; + unsigned int src_off, dst_off, len; + unsigned int error_count; + unsigned int failed_tests = 0; + unsigned int total_tests = 0; + dma_cookie_t cookie; + enum dma_status status; + enum dma_ctrl_flags flags; + u8 pq_coefs[pq_sources + 1]; + int ret; + int src_cnt; + int dst_cnt; + int i; + + thread_name = current->comm; + set_freezable(); + + ret = -ENOMEM; + + smp_rmb(); + chan = thread->chan; + if (thread->type == DMA_MEMCPY) + src_cnt = dst_cnt = 1; + else if (thread->type == DMA_XOR) { + src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ + dst_cnt = 1; + } else if (thread->type == DMA_PQ) { + src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ + dst_cnt = 2; + for (i = 0; i < src_cnt; i++) + pq_coefs[i] = 1; + } else + goto err_srcs; + + thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL); + if (!thread->srcs) + goto err_srcs; + for (i = 0; i < src_cnt; i++) { + thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL); + if (!thread->srcs[i]) + goto err_srcbuf; + } + thread->srcs[i] = NULL; + + thread->dsts = kcalloc(dst_cnt+1, sizeof(u8 *), GFP_KERNEL); + if (!thread->dsts) + goto err_dsts; + for (i = 0; i < dst_cnt; i++) { + thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL); + if (!thread->dsts[i]) + goto err_dstbuf; + } + thread->dsts[i] = NULL; + + set_user_nice(current, 10); + + /* + * src buffers are freed by the DMAEngine code with dma_unmap_single() + * dst buffers are freed by ourselves below + */ + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT + | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE; + + while (!kthread_should_stop() + && !(iterations && total_tests >= iterations)) { + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx = NULL; + dma_addr_t dma_srcs[src_cnt]; + dma_addr_t dma_dsts[dst_cnt]; + u8 align = 0; + + total_tests++; + + /* honor alignment restrictions */ + if (thread->type == DMA_MEMCPY) + align = dev->copy_align; + else if (thread->type == DMA_XOR) + align = dev->xor_align; + else if (thread->type == DMA_PQ) + align = dev->pq_align; + + if (1 << align > test_buf_size) { + pr_err("%u-byte buffer too small for %d-byte alignment\n", + test_buf_size, 1 << align); + break; + } + + len = dmatest_random() % test_buf_size + 1; + len = (len >> align) << align; + if (!len) + len = 1 << align; + src_off = dmatest_random() % (test_buf_size - len + 1); + dst_off = dmatest_random() % (test_buf_size - len + 1); + + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + + dmatest_init_srcs(thread->srcs, src_off, len); + dmatest_init_dsts(thread->dsts, dst_off, len); + + for (i = 0; i < src_cnt; i++) { + u8 *buf = thread->srcs[i] + src_off; + + dma_srcs[i] = dma_map_single(dev->dev, buf, len, + DMA_TO_DEVICE); + } + /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */ + for (i = 0; i < dst_cnt; i++) { + dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i], + test_buf_size, + DMA_BIDIRECTIONAL); + } + + + if (thread->type == DMA_MEMCPY) + tx = dev->device_prep_dma_memcpy(chan, + dma_dsts[0] + dst_off, + dma_srcs[0], len, + flags); + else if (thread->type == DMA_XOR) + tx = dev->device_prep_dma_xor(chan, + dma_dsts[0] + dst_off, + dma_srcs, src_cnt, + len, flags); + else if (thread->type == DMA_PQ) { + dma_addr_t dma_pq[dst_cnt]; + + for (i = 0; i < dst_cnt; i++) + dma_pq[i] = dma_dsts[i] + dst_off; + tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, + src_cnt, pq_coefs, + len, flags); + } + + if (!tx) { + for (i = 0; i < src_cnt; i++) + dma_unmap_single(dev->dev, dma_srcs[i], len, + DMA_TO_DEVICE); + for (i = 0; i < dst_cnt; i++) + dma_unmap_single(dev->dev, dma_dsts[i], + test_buf_size, + DMA_BIDIRECTIONAL); + pr_warning("%s: #%u: prep error with src_off=0x%x " + "dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, + src_off, dst_off, len); + msleep(100); + failed_tests++; + continue; + } + + done.done = false; + tx->callback = dmatest_callback; + tx->callback_param = &done; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + pr_warning("%s: #%u: submit error %d with src_off=0x%x " + "dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, cookie, + src_off, dst_off, len); + msleep(100); + failed_tests++; + continue; + } + dma_async_issue_pending(chan); + + wait_event_freezable_timeout(done_wait, done.done, + msecs_to_jiffies(timeout)); + + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + + if (!done.done) { + /* + * We're leaving the timed out dma operation with + * dangling pointer to done_wait. To make this + * correct, we'll need to allocate wait_done for + * each test iteration and perform "who's gonna + * free it this time?" dancing. For now, just + * leave it dangling. + */ + pr_warning("%s: #%u: test timed out\n", + thread_name, total_tests - 1); + failed_tests++; + continue; + } else if (status != DMA_SUCCESS) { + pr_warning("%s: #%u: got completion callback," + " but status is \'%s\'\n", + thread_name, total_tests - 1, + status == DMA_ERROR ? "error" : "in progress"); + failed_tests++; + continue; + } + + /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */ + for (i = 0; i < dst_cnt; i++) + dma_unmap_single(dev->dev, dma_dsts[i], test_buf_size, + DMA_BIDIRECTIONAL); + + error_count = 0; + + pr_debug("%s: verifying source buffer...\n", thread_name); + error_count += dmatest_verify(thread->srcs, 0, src_off, + 0, PATTERN_SRC, true); + error_count += dmatest_verify(thread->srcs, src_off, + src_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, true); + error_count += dmatest_verify(thread->srcs, src_off + len, + test_buf_size, src_off + len, + PATTERN_SRC, true); + + pr_debug("%s: verifying dest buffer...\n", + thread->task->comm); + error_count += dmatest_verify(thread->dsts, 0, dst_off, + 0, PATTERN_DST, false); + error_count += dmatest_verify(thread->dsts, dst_off, + dst_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, false); + error_count += dmatest_verify(thread->dsts, dst_off + len, + test_buf_size, dst_off + len, + PATTERN_DST, false); + + if (error_count) { + pr_warning("%s: #%u: %u errors with " + "src_off=0x%x dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, error_count, + src_off, dst_off, len); + failed_tests++; + } else { + pr_debug("%s: #%u: No errors with " + "src_off=0x%x dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, + src_off, dst_off, len); + } + } + + ret = 0; + for (i = 0; thread->dsts[i]; i++) + kfree(thread->dsts[i]); +err_dstbuf: + kfree(thread->dsts); +err_dsts: + for (i = 0; thread->srcs[i]; i++) + kfree(thread->srcs[i]); +err_srcbuf: + kfree(thread->srcs); +err_srcs: + pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", + thread_name, total_tests, failed_tests, ret); + + /* terminate all transfers on specified channels */ + chan->device->device_control(chan, DMA_TERMINATE_ALL, 0); + if (iterations > 0) + while (!kthread_should_stop()) { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit); + interruptible_sleep_on(&wait_dmatest_exit); + } + + return ret; +} + +static void dmatest_cleanup_channel(struct dmatest_chan *dtc) +{ + struct dmatest_thread *thread; + struct dmatest_thread *_thread; + int ret; + + list_for_each_entry_safe(thread, _thread, &dtc->threads, node) { + ret = kthread_stop(thread->task); + pr_debug("dmatest: thread %s exited with status %d\n", + thread->task->comm, ret); + list_del(&thread->node); + kfree(thread); + } + + /* terminate all transfers on specified channels */ + dtc->chan->device->device_control(dtc->chan, DMA_TERMINATE_ALL, 0); + + kfree(dtc); +} + +static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_type type) +{ + struct dmatest_thread *thread; + struct dma_chan *chan = dtc->chan; + char *op; + unsigned int i; + + if (type == DMA_MEMCPY) + op = "copy"; + else if (type == DMA_XOR) + op = "xor"; + else if (type == DMA_PQ) + op = "pq"; + else + return -EINVAL; + + for (i = 0; i < threads_per_chan; i++) { + thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); + if (!thread) { + pr_warning("dmatest: No memory for %s-%s%u\n", + dma_chan_name(chan), op, i); + + break; + } + thread->chan = dtc->chan; + thread->type = type; + smp_wmb(); + thread->task = kthread_run(dmatest_func, thread, "%s-%s%u", + dma_chan_name(chan), op, i); + if (IS_ERR(thread->task)) { + pr_warning("dmatest: Failed to run thread %s-%s%u\n", + dma_chan_name(chan), op, i); + kfree(thread); + break; + } + + /* srcbuf and dstbuf are allocated by the thread itself */ + + list_add_tail(&thread->node, &dtc->threads); + } + + return i; +} + +static int dmatest_add_channel(struct dma_chan *chan) +{ + struct dmatest_chan *dtc; + struct dma_device *dma_dev = chan->device; + unsigned int thread_count = 0; + int cnt; + + dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL); + if (!dtc) { + pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan)); + return -ENOMEM; + } + + dtc->chan = chan; + INIT_LIST_HEAD(&dtc->threads); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(dtc, DMA_MEMCPY); + thread_count += cnt > 0 ? cnt : 0; + } + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(dtc, DMA_XOR); + thread_count += cnt > 0 ? cnt : 0; + } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(dtc, DMA_PQ); + thread_count += cnt > 0 ? cnt : 0; + } + + pr_info("dmatest: Started %u threads using %s\n", + thread_count, dma_chan_name(chan)); + + list_add_tail(&dtc->node, &dmatest_channels); + nr_channels++; + + return 0; +} + +static bool filter(struct dma_chan *chan, void *param) +{ + if (!dmatest_match_channel(chan) || !dmatest_match_device(chan->device)) + return false; + else + return true; +} + +static int __init dmatest_init(void) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + int err = 0; + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + for (;;) { + chan = dma_request_channel(mask, filter, NULL); + if (chan) { + err = dmatest_add_channel(chan); + if (err) { + dma_release_channel(chan); + break; /* add_channel failed, punt */ + } + } else + break; /* no more channels available */ + if (max_channels && nr_channels >= max_channels) + break; /* we have all we need */ + } + + return err; +} +/* when compiled-in wait for drivers to load first */ +late_initcall(dmatest_init); + +static void __exit dmatest_exit(void) +{ + struct dmatest_chan *dtc, *_dtc; + struct dma_chan *chan; + + list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) { + list_del(&dtc->node); + chan = dtc->chan; + dmatest_cleanup_channel(dtc); + pr_debug("dmatest: dropped channel %s\n", + dma_chan_name(chan)); + dma_release_channel(chan); + } +} +module_exit(dmatest_exit); + +MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c new file mode 100644 index 00000000..7439079f --- /dev/null +++ b/drivers/dma/dw_dmac.c @@ -0,0 +1,1619 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on + * AVR32 systems.) + * + * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "dw_dmac_regs.h" +#include "dmaengine.h" + +/* + * This supports the Synopsys "DesignWare AHB Central DMA Controller", + * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all + * of which use ARM any more). See the "Databook" from Synopsys for + * information beyond what licensees probably provide. + * + * The driver has currently been tested only with the Atmel AT32AP7000, + * which does not support descriptor writeback. + */ + +#define DWC_DEFAULT_CTLLO(_chan) ({ \ + struct dw_dma_slave *__slave = (_chan->private); \ + struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan); \ + struct dma_slave_config *_sconfig = &_dwc->dma_sconfig; \ + int _dms = __slave ? __slave->dst_master : 0; \ + int _sms = __slave ? __slave->src_master : 1; \ + u8 _smsize = __slave ? _sconfig->src_maxburst : \ + DW_DMA_MSIZE_16; \ + u8 _dmsize = __slave ? _sconfig->dst_maxburst : \ + DW_DMA_MSIZE_16; \ + \ + (DWC_CTLL_DST_MSIZE(_dmsize) \ + | DWC_CTLL_SRC_MSIZE(_smsize) \ + | DWC_CTLL_LLP_D_EN \ + | DWC_CTLL_LLP_S_EN \ + | DWC_CTLL_DMS(_dms) \ + | DWC_CTLL_SMS(_sms)); \ + }) + +/* + * This is configuration-dependent and usually a funny size like 4095. + * + * Note that this is a transfer count, i.e. if we transfer 32-bit + * words, we can do 16380 bytes per descriptor. + * + * This parameter is also system-specific. + */ +#define DWC_MAX_COUNT 4095U + +/* + * Number of descriptors to allocate for each channel. This should be + * made configurable somehow; preferably, the clients (at least the + * ones using slave transfers) should be able to give us a hint. + */ +#define NR_DESCS_PER_CHANNEL 64 + +/*----------------------------------------------------------------------*/ + +/* + * Because we're not relying on writeback from the controller (it may not + * even be configured into the core!) we don't need to use dma_pool. These + * descriptors -- and associated data -- are cacheable. We do need to make + * sure their dcache entries are written back before handing them off to + * the controller, though. + */ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) +{ + return list_entry(dwc->active_list.next, struct dw_desc, desc_node); +} + +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + struct dw_desc *ret = NULL; + unsigned int i = 0; + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); + i++; + } + spin_unlock_irqrestore(&dwc->lock, flags); + + dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); + + return ret; +} + +static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + struct dw_desc *child; + + list_for_each_entry(child, &desc->tx_list, desc_node) + dma_sync_single_for_cpu(chan2parent(&dwc->chan), + child->txd.phys, sizeof(child->lli), + DMA_TO_DEVICE); + dma_sync_single_for_cpu(chan2parent(&dwc->chan), + desc->txd.phys, sizeof(desc->lli), + DMA_TO_DEVICE); +} + +/* + * Move a descriptor, including any children, to the free list. + * `desc' must not be on any lists. + */ +static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + unsigned long flags; + + if (desc) { + struct dw_desc *child; + + dwc_sync_desc_for_cpu(dwc, desc); + + spin_lock_irqsave(&dwc->lock, flags); + list_for_each_entry(child, &desc->tx_list, desc_node) + dev_vdbg(chan2dev(&dwc->chan), + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->tx_list, &dwc->free_list); + dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); + list_add(&desc->desc_node, &dwc->free_list); + spin_unlock_irqrestore(&dwc->lock, flags); + } +} + +static void dwc_initialize(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_dma_slave *dws = dwc->chan.private; + u32 cfghi = DWC_CFGH_FIFO_MODE; + u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); + + if (dwc->initialized == true) + return; + + if (dws) { + /* + * We need controller-specific data to set up slave + * transfers. + */ + BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); + + cfghi = dws->cfg_hi; + cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK; + } + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); + + /* Enable interrupts */ + channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.ERROR, dwc->mask); + + dwc->initialized = true; +} + +/*----------------------------------------------------------------------*/ + +/* Called with dwc->lock held and bh disabled */ +static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(chan2dev(&dwc->chan), + "BUG: Attempted to start non-idle channel\n"); + dev_err(chan2dev(&dwc->chan), + " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", + channel_readl(dwc, SAR), + channel_readl(dwc, DAR), + channel_readl(dwc, LLP), + channel_readl(dwc, CTL_HI), + channel_readl(dwc, CTL_LO)); + + /* The tasklet will hopefully advance the queue... */ + return; + } + + dwc_initialize(dwc); + + channel_writel(dwc, LLP, first->txd.phys); + channel_writel(dwc, CTL_LO, + DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); +} + +/*----------------------------------------------------------------------*/ + +static void +dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, + bool callback_required) +{ + dma_async_tx_callback callback = NULL; + void *param = NULL; + struct dma_async_tx_descriptor *txd = &desc->txd; + struct dw_desc *child; + unsigned long flags; + + dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie); + + spin_lock_irqsave(&dwc->lock, flags); + dma_cookie_complete(txd); + if (callback_required) { + callback = txd->callback; + param = txd->callback_param; + } + + dwc_sync_desc_for_cpu(dwc, desc); + + /* async_tx_ack */ + list_for_each_entry(child, &desc->tx_list, desc_node) + async_tx_ack(&child->txd); + async_tx_ack(&desc->txd); + + list_splice_init(&desc->tx_list, &dwc->free_list); + list_move(&desc->desc_node, &dwc->free_list); + + if (!dwc->chan.private) { + struct device *parent = chan2parent(&dwc->chan); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + } + } + + spin_unlock_irqrestore(&dwc->lock, flags); + + if (callback_required && callback) + callback(param); +} + +static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(chan2dev(&dwc->chan), + "BUG: XFER bit set, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + } + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + list_splice_init(&dwc->active_list, &list); + if (!list_empty(&dwc->queue)) { + list_move(dwc->queue.next, &dwc->active_list); + dwc_dostart(dwc, dwc_first_active(dwc)); + } + + spin_unlock_irqrestore(&dwc->lock, flags); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc, true); +} + +static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + dma_addr_t llp; + struct dw_desc *desc, *_desc; + struct dw_desc *child; + u32 status_xfer; + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + llp = channel_readl(dwc, LLP); + status_xfer = dma_readl(dw, RAW.XFER); + + if (status_xfer & dwc->mask) { + /* Everything we've submitted is done */ + dma_writel(dw, CLEAR.XFER, dwc->mask); + spin_unlock_irqrestore(&dwc->lock, flags); + + dwc_complete_all(dw, dwc); + return; + } + + if (list_empty(&dwc->active_list)) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp); + + list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* check first descriptors addr */ + if (desc->txd.phys == llp) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* check first descriptors llp */ + if (desc->lli.llp == llp) { + /* This one is currently in progress */ + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + list_for_each_entry(child, &desc->tx_list, desc_node) + if (child->lli.llp == llp) { + /* Currently in progress */ + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* + * No descriptors so far seem to be in progress, i.e. + * this one must be done. + */ + spin_unlock_irqrestore(&dwc->lock, flags); + dwc_descriptor_complete(dwc, desc, true); + spin_lock_irqsave(&dwc->lock, flags); + } + + dev_err(chan2dev(&dwc->chan), + "BUG: All descriptors done, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + if (!list_empty(&dwc->queue)) { + list_move(dwc->queue.next, &dwc->active_list); + dwc_dostart(dwc, dwc_first_active(dwc)); + } + spin_unlock_irqrestore(&dwc->lock, flags); +} + +static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) +{ + dev_printk(KERN_CRIT, chan2dev(&dwc->chan), + " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", + lli->sar, lli->dar, lli->llp, + lli->ctlhi, lli->ctllo); +} + +static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *bad_desc; + struct dw_desc *child; + unsigned long flags; + + dwc_scan_descriptors(dw, dwc); + + spin_lock_irqsave(&dwc->lock, flags); + + /* + * The descriptor currently at the head of the active list is + * borked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + bad_desc = dwc_first_active(dwc); + list_del_init(&bad_desc->desc_node); + list_move(dwc->queue.next, dwc->active_list.prev); + + /* Clear the error flag and try to restart the controller */ + dma_writel(dw, CLEAR.ERROR, dwc->mask); + if (!list_empty(&dwc->active_list)) + dwc_dostart(dwc, dwc_first_active(dwc)); + + /* + * KERN_CRITICAL may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_printk(KERN_CRIT, chan2dev(&dwc->chan), + "Bad descriptor submitted for DMA!\n"); + dev_printk(KERN_CRIT, chan2dev(&dwc->chan), + " cookie: %d\n", bad_desc->txd.cookie); + dwc_dump_lli(dwc, &bad_desc->lli); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) + dwc_dump_lli(dwc, &child->lli); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Pretend the descriptor completed successfully */ + dwc_descriptor_complete(dwc, bad_desc, true); +} + +/* --------------------- Cyclic DMA API extensions -------------------- */ + +inline dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + return channel_readl(dwc, SAR); +} +EXPORT_SYMBOL(dw_dma_get_src_addr); + +inline dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + return channel_readl(dwc, DAR); +} +EXPORT_SYMBOL(dw_dma_get_dst_addr); + +/* called with dwc->lock held and all DMAC interrupts disabled */ +static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, + u32 status_err, u32 status_xfer) +{ + unsigned long flags; + + if (dwc->mask) { + void (*callback)(void *param); + void *callback_param; + + dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n", + channel_readl(dwc, LLP)); + + callback = dwc->cdesc->period_callback; + callback_param = dwc->cdesc->period_callback_param; + + if (callback) + callback(callback_param); + } + + /* + * Error and transfer complete are highly unlikely, and will most + * likely be due to a configuration error by the user. + */ + if (unlikely(status_err & dwc->mask) || + unlikely(status_xfer & dwc->mask)) { + int i; + + dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s " + "interrupt, stopping DMA transfer\n", + status_xfer ? "xfer" : "error"); + + spin_lock_irqsave(&dwc->lock, flags); + + dev_err(chan2dev(&dwc->chan), + " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", + channel_readl(dwc, SAR), + channel_readl(dwc, DAR), + channel_readl(dwc, LLP), + channel_readl(dwc, CTL_HI), + channel_readl(dwc, CTL_LO)); + + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + /* make sure DMA does not restart by loading a new list */ + channel_writel(dwc, LLP, 0); + channel_writel(dwc, CTL_LO, 0); + channel_writel(dwc, CTL_HI, 0); + + dma_writel(dw, CLEAR.ERROR, dwc->mask); + dma_writel(dw, CLEAR.XFER, dwc->mask); + + for (i = 0; i < dwc->cdesc->periods; i++) + dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); + + spin_unlock_irqrestore(&dwc->lock, flags); + } +} + +/* ------------------------------------------------------------------------- */ + +static void dw_dma_tasklet(unsigned long data) +{ + struct dw_dma *dw = (struct dw_dma *)data; + struct dw_dma_chan *dwc; + u32 status_xfer; + u32 status_err; + int i; + + status_xfer = dma_readl(dw, RAW.XFER); + status_err = dma_readl(dw, RAW.ERROR); + + dev_vdbg(dw->dma.dev, "tasklet: status_err=%x\n", status_err); + + for (i = 0; i < dw->dma.chancnt; i++) { + dwc = &dw->chan[i]; + if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) + dwc_handle_cyclic(dw, dwc, status_err, status_xfer); + else if (status_err & (1 << i)) + dwc_handle_error(dw, dwc); + else if (status_xfer & (1 << i)) + dwc_scan_descriptors(dw, dwc); + } + + /* + * Re-enable interrupts. + */ + channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); +} + +static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) +{ + struct dw_dma *dw = dev_id; + u32 status; + + dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n", + dma_readl(dw, STATUS_INT)); + + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + status = dma_readl(dw, STATUS_INT); + if (status) { + dev_err(dw->dma.dev, + "BUG: Unexpected interrupts pending: 0x%x\n", + status); + + /* Try to recover */ + channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); + } + + tasklet_schedule(&dw->tasklet); + + return IRQ_HANDLED; +} + +/*----------------------------------------------------------------------*/ + +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + cookie = dma_cookie_assign(tx); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + if (list_empty(&dwc->active_list)) { + dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", + desc->txd.cookie); + list_add_tail(&desc->desc_node, &dwc->active_list); + dwc_dostart(dwc, dwc_first_active(dwc)); + } else { + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", + desc->txd.cookie); + + list_add_tail(&desc->desc_node, &dwc->queue); + } + + spin_unlock_irqrestore(&dwc->lock, flags); + + return cookie; +} + +static struct dma_async_tx_descriptor * +dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_desc *desc; + struct dw_desc *first; + struct dw_desc *prev; + size_t xfer_count; + size_t offset; + unsigned int src_width; + unsigned int dst_width; + u32 ctllo; + + dev_vdbg(chan2dev(chan), "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n", + dest, src, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + /* + * We can be a lot more clever here, but this should take care + * of the most common optimization. + */ + if (!((src | dest | len) & 7)) + src_width = dst_width = 3; + else if (!((src | dest | len) & 3)) + src_width = dst_width = 2; + else if (!((src | dest | len) & 1)) + src_width = dst_width = 1; + else + src_width = dst_width = 0; + + ctllo = DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(dst_width) + | DWC_CTLL_SRC_WIDTH(src_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_INC + | DWC_CTLL_FC_M2M; + prev = first = NULL; + + for (offset = 0; offset < len; offset += xfer_count << src_width) { + xfer_count = min_t(size_t, (len - offset) >> src_width, + DWC_MAX_COUNT); + + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + + desc->lli.sar = src + offset; + desc->lli.dar = dest + offset; + desc->lli.ctllo = ctllo; + desc->lli.ctlhi = xfer_count; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan2parent(chan), + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->tx_list); + } + prev = desc; + } + + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + prev->lli.ctllo |= DWC_CTLL_INT_EN; + + prev->lli.llp = 0; + dma_sync_single_for_device(chan2parent(chan), + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +static struct dma_async_tx_descriptor * +dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma_slave *dws = chan->private; + struct dma_slave_config *sconfig = &dwc->dma_sconfig; + struct dw_desc *prev; + struct dw_desc *first; + u32 ctllo; + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(chan2dev(chan), "prep_dma_slave\n"); + + if (unlikely(!dws || !sg_len)) + return NULL; + + prev = first = NULL; + + switch (direction) { + case DMA_MEM_TO_DEV: + reg_width = __fls(sconfig->dst_addr_width); + reg = sconfig->dst_addr; + ctllo = (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC); + + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P); + + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, dlen, mem; + + mem = sg_phys(sg); + len = sg_dma_len(sg); + + if (!((mem | len) & 7)) + mem_width = 3; + else if (!((mem | len) & 3)) + mem_width = 2; + else if (!((mem | len) & 1)) + mem_width = 1; + else + mem_width = 0; + +slave_sg_todev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(chan2dev(chan), + "not enough descriptors available\n"); + goto err_desc_get; + } + + desc->lli.sar = mem; + desc->lli.dar = reg; + desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); + if ((len >> mem_width) > DWC_MAX_COUNT) { + dlen = DWC_MAX_COUNT << mem_width; + mem += dlen; + len -= dlen; + } else { + dlen = len; + len = 0; + } + + desc->lli.ctlhi = dlen >> mem_width; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan2parent(chan), + prev->txd.phys, + sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->tx_list); + } + prev = desc; + total_len += dlen; + + if (len) + goto slave_sg_todev_fill_desc; + } + break; + case DMA_DEV_TO_MEM: + reg_width = __fls(sconfig->src_addr_width); + reg = sconfig->src_addr; + ctllo = (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX); + + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M); + + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, dlen, mem; + + mem = sg_phys(sg); + len = sg_dma_len(sg); + + if (!((mem | len) & 7)) + mem_width = 3; + else if (!((mem | len) & 3)) + mem_width = 2; + else if (!((mem | len) & 1)) + mem_width = 1; + else + mem_width = 0; + +slave_sg_fromdev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(chan2dev(chan), + "not enough descriptors available\n"); + goto err_desc_get; + } + + desc->lli.sar = reg; + desc->lli.dar = mem; + desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); + if ((len >> reg_width) > DWC_MAX_COUNT) { + dlen = DWC_MAX_COUNT << reg_width; + mem += dlen; + len -= dlen; + } else { + dlen = len; + len = 0; + } + desc->lli.ctlhi = dlen >> reg_width; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan2parent(chan), + prev->txd.phys, + sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->tx_list); + } + prev = desc; + total_len += dlen; + + if (len) + goto slave_sg_fromdev_fill_desc; + } + break; + default: + return NULL; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + prev->lli.ctllo |= DWC_CTLL_INT_EN; + + prev->lli.llp = 0; + dma_sync_single_for_device(chan2parent(chan), + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + + first->len = total_len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +/* + * Fix sconfig's burst size according to dw_dmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. + * + * NOTE: burst size 2 is not supported by controller. + * + * This can be done by finding least significant bit set: n & (n - 1) + */ +static inline void convert_burst(u32 *maxburst) +{ + if (*maxburst > 1) + *maxburst = fls(*maxburst) - 2; + else + *maxburst = 0; +} + +static int +set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + /* Check if it is chan is configured for slave transfers */ + if (!chan->private) + return -EINVAL; + + memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); + + convert_burst(&dwc->dma_sconfig.src_maxburst); + convert_burst(&dwc->dma_sconfig.dst_maxburst); + + return 0; +} + +static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + unsigned long flags; + u32 cfglo; + LIST_HEAD(list); + + if (cmd == DMA_PAUSE) { + spin_lock_irqsave(&dwc->lock, flags); + + cfglo = channel_readl(dwc, CFG_LO); + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY)) + cpu_relax(); + + dwc->paused = true; + spin_unlock_irqrestore(&dwc->lock, flags); + } else if (cmd == DMA_RESUME) { + if (!dwc->paused) + return 0; + + spin_lock_irqsave(&dwc->lock, flags); + + cfglo = channel_readl(dwc, CFG_LO); + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); + dwc->paused = false; + + spin_unlock_irqrestore(&dwc->lock, flags); + } else if (cmd == DMA_TERMINATE_ALL) { + spin_lock_irqsave(&dwc->lock, flags); + + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + dwc->paused = false; + + /* active_list entries will end up before queued entries */ + list_splice_init(&dwc->queue, &list); + list_splice_init(&dwc->active_list, &list); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc, false); + } else if (cmd == DMA_SLAVE_CONFIG) { + return set_runtime_config(chan, (struct dma_slave_config *)arg); + } else { + return -ENXIO; + } + + return 0; +} + +static enum dma_status +dwc_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_SUCCESS) { + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + + ret = dma_cookie_status(chan, cookie, txstate); + } + + if (ret != DMA_SUCCESS) + dma_set_residue(txstate, dwc_first_active(dwc)->len); + + if (dwc->paused) + return DMA_PAUSED; + + return ret; +} + +static void dwc_issue_pending(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + if (!list_empty(&dwc->queue)) + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); +} + +static int dwc_alloc_chan_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc; + int i; + unsigned long flags; + + dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_dbg(chan2dev(chan), "DMA channel not idle?\n"); + return -EIO; + } + + dma_cookie_init(chan); + + /* + * NOTE: some controllers may have additional features that we + * need to initialize here, like "scatter-gather" (which + * doesn't mean what you think it means), and status writeback. + */ + + spin_lock_irqsave(&dwc->lock, flags); + i = dwc->descs_allocated; + while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { + spin_unlock_irqrestore(&dwc->lock, flags); + + desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL); + if (!desc) { + dev_info(chan2dev(chan), + "only allocated %d descriptors\n", i); + spin_lock_irqsave(&dwc->lock, flags); + break; + } + + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = dma_map_single(chan2parent(chan), &desc->lli, + sizeof(desc->lli), DMA_TO_DEVICE); + dwc_desc_put(dwc, desc); + + spin_lock_irqsave(&dwc->lock, flags); + i = ++dwc->descs_allocated; + } + + spin_unlock_irqrestore(&dwc->lock, flags); + + dev_dbg(chan2dev(chan), + "alloc_chan_resources allocated %d descriptors\n", i); + + return i; +} + +static void dwc_free_chan_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n", + dwc->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&dwc->active_list)); + BUG_ON(!list_empty(&dwc->queue)); + BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); + + spin_lock_irqsave(&dwc->lock, flags); + list_splice_init(&dwc->free_list, &list); + dwc->descs_allocated = 0; + dwc->initialized = false; + + /* Disable interrupts */ + channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.ERROR, dwc->mask); + + spin_unlock_irqrestore(&dwc->lock, flags); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) { + dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); + dma_unmap_single(chan2parent(chan), desc->txd.phys, + sizeof(desc->lli), DMA_TO_DEVICE); + kfree(desc); + } + + dev_vdbg(chan2dev(chan), "free_chan_resources done\n"); +} + +/* --------------------- Cyclic DMA API extensions -------------------- */ + +/** + * dw_dma_cyclic_start - start the cyclic DMA transfer + * @chan: the DMA channel to start + * + * Must be called with soft interrupts disabled. Returns zero on success or + * -errno on failure. + */ +int dw_dma_cyclic_start(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned long flags; + + if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { + dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n"); + return -ENODEV; + } + + spin_lock_irqsave(&dwc->lock, flags); + + /* assert channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(chan2dev(&dwc->chan), + "BUG: Attempted to start non-idle channel\n"); + dev_err(chan2dev(&dwc->chan), + " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", + channel_readl(dwc, SAR), + channel_readl(dwc, DAR), + channel_readl(dwc, LLP), + channel_readl(dwc, CTL_HI), + channel_readl(dwc, CTL_LO)); + spin_unlock_irqrestore(&dwc->lock, flags); + return -EBUSY; + } + + dma_writel(dw, CLEAR.ERROR, dwc->mask); + dma_writel(dw, CLEAR.XFER, dwc->mask); + + /* setup DMAC channel registers */ + channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys); + channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + + channel_set_bit(dw, CH_EN, dwc->mask); + + spin_unlock_irqrestore(&dwc->lock, flags); + + return 0; +} +EXPORT_SYMBOL(dw_dma_cyclic_start); + +/** + * dw_dma_cyclic_stop - stop the cyclic DMA transfer + * @chan: the DMA channel to stop + * + * Must be called with soft interrupts disabled. + */ +void dw_dma_cyclic_stop(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + spin_unlock_irqrestore(&dwc->lock, flags); +} +EXPORT_SYMBOL(dw_dma_cyclic_stop); + +/** + * dw_dma_cyclic_prep - prepare the cyclic DMA transfer + * @chan: the DMA channel to prepare + * @buf_addr: physical DMA address where the buffer starts + * @buf_len: total number of bytes for the entire buffer + * @period_len: number of bytes for each period + * @direction: transfer direction, to or from device + * + * Must be called before trying to start the transfer. Returns a valid struct + * dw_cyclic_desc if successful or an ERR_PTR(-errno) if not successful. + */ +struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, size_t period_len, + enum dma_transfer_direction direction) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dma_slave_config *sconfig = &dwc->dma_sconfig; + struct dw_cyclic_desc *cdesc; + struct dw_cyclic_desc *retval = NULL; + struct dw_desc *desc; + struct dw_desc *last = NULL; + unsigned long was_cyclic; + unsigned int reg_width; + unsigned int periods; + unsigned int i; + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) { + spin_unlock_irqrestore(&dwc->lock, flags); + dev_dbg(chan2dev(&dwc->chan), + "queue and/or active list are not empty\n"); + return ERR_PTR(-EBUSY); + } + + was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags); + spin_unlock_irqrestore(&dwc->lock, flags); + if (was_cyclic) { + dev_dbg(chan2dev(&dwc->chan), + "channel already prepared for cyclic DMA\n"); + return ERR_PTR(-EBUSY); + } + + retval = ERR_PTR(-EINVAL); + + if (direction == DMA_MEM_TO_DEV) + reg_width = __ffs(sconfig->dst_addr_width); + else + reg_width = __ffs(sconfig->src_addr_width); + + periods = buf_len / period_len; + + /* Check for too big/unaligned periods and unaligned DMA buffer. */ + if (period_len > (DWC_MAX_COUNT << reg_width)) + goto out_err; + if (unlikely(period_len & ((1 << reg_width) - 1))) + goto out_err; + if (unlikely(buf_addr & ((1 << reg_width) - 1))) + goto out_err; + if (unlikely(!(direction & (DMA_MEM_TO_DEV | DMA_DEV_TO_MEM)))) + goto out_err; + + retval = ERR_PTR(-ENOMEM); + + if (periods > NR_DESCS_PER_CHANNEL) + goto out_err; + + cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); + if (!cdesc) + goto out_err; + + cdesc->desc = kzalloc(sizeof(struct dw_desc *) * periods, GFP_KERNEL); + if (!cdesc->desc) + goto out_err_alloc; + + for (i = 0; i < periods; i++) { + desc = dwc_desc_get(dwc); + if (!desc) + goto out_err_desc_get; + + switch (direction) { + case DMA_MEM_TO_DEV: + desc->lli.dar = sconfig->dst_addr; + desc->lli.sar = buf_addr + (period_len * i); + desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC + | DWC_CTLL_INT_EN); + + desc->lli.ctllo |= sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P); + + break; + case DMA_DEV_TO_MEM: + desc->lli.dar = buf_addr + (period_len * i); + desc->lli.sar = sconfig->src_addr; + desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX + | DWC_CTLL_INT_EN); + + desc->lli.ctllo |= sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M); + + break; + default: + break; + } + + desc->lli.ctlhi = (period_len >> reg_width); + cdesc->desc[i] = desc; + + if (last) { + last->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan2parent(chan), + last->txd.phys, sizeof(last->lli), + DMA_TO_DEVICE); + } + + last = desc; + } + + /* lets make a cyclic list */ + last->lli.llp = cdesc->desc[0]->txd.phys; + dma_sync_single_for_device(chan2parent(chan), last->txd.phys, + sizeof(last->lli), DMA_TO_DEVICE); + + dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%08x len %zu " + "period %zu periods %d\n", buf_addr, buf_len, + period_len, periods); + + cdesc->periods = periods; + dwc->cdesc = cdesc; + + return cdesc; + +out_err_desc_get: + while (i--) + dwc_desc_put(dwc, cdesc->desc[i]); +out_err_alloc: + kfree(cdesc); +out_err: + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); + return (struct dw_cyclic_desc *)retval; +} +EXPORT_SYMBOL(dw_dma_cyclic_prep); + +/** + * dw_dma_cyclic_free - free a prepared cyclic DMA transfer + * @chan: the DMA channel to free + */ +void dw_dma_cyclic_free(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_cyclic_desc *cdesc = dwc->cdesc; + int i; + unsigned long flags; + + dev_dbg(chan2dev(&dwc->chan), "cyclic free\n"); + + if (!cdesc) + return; + + spin_lock_irqsave(&dwc->lock, flags); + + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + dma_writel(dw, CLEAR.ERROR, dwc->mask); + dma_writel(dw, CLEAR.XFER, dwc->mask); + + spin_unlock_irqrestore(&dwc->lock, flags); + + for (i = 0; i < cdesc->periods; i++) + dwc_desc_put(dwc, cdesc->desc[i]); + + kfree(cdesc->desc); + kfree(cdesc); + + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); +} +EXPORT_SYMBOL(dw_dma_cyclic_free); + +/*----------------------------------------------------------------------*/ + +static void dw_dma_off(struct dw_dma *dw) +{ + int i; + + dma_writel(dw, CFG, 0); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + while (dma_readl(dw, CFG) & DW_CFG_DMA_EN) + cpu_relax(); + + for (i = 0; i < dw->dma.chancnt; i++) + dw->chan[i].initialized = false; +} + +static int __init dw_probe(struct platform_device *pdev) +{ + struct dw_dma_platform_data *pdata; + struct resource *io; + struct dw_dma *dw; + size_t size; + int irq; + int err; + int i; + + pdata = dev_get_platdata(&pdev->dev); + if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) + return -EINVAL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + size = sizeof(struct dw_dma); + size += pdata->nr_channels * sizeof(struct dw_dma_chan); + dw = kzalloc(size, GFP_KERNEL); + if (!dw) + return -ENOMEM; + + if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) { + err = -EBUSY; + goto err_kfree; + } + + dw->regs = ioremap(io->start, DW_REGLEN); + if (!dw->regs) { + err = -ENOMEM; + goto err_release_r; + } + + dw->clk = clk_get(&pdev->dev, "hclk"); + if (IS_ERR(dw->clk)) { + err = PTR_ERR(dw->clk); + goto err_clk; + } + clk_enable(dw->clk); + + /* force dma off, just in case */ + dw_dma_off(dw); + + err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw); + if (err) + goto err_irq; + + platform_set_drvdata(pdev, dw); + + tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw); + + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < pdata->nr_channels; i++) { + struct dw_dma_chan *dwc = &dw->chan[i]; + + dwc->chan.device = &dw->dma; + dma_cookie_init(&dwc->chan); + if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING) + list_add_tail(&dwc->chan.device_node, + &dw->dma.channels); + else + list_add(&dwc->chan.device_node, &dw->dma.channels); + + /* 7 is highest priority & 0 is lowest. */ + if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) + dwc->priority = pdata->nr_channels - i - 1; + else + dwc->priority = i; + + dwc->ch_regs = &__dw_regs(dw)->CHAN[i]; + spin_lock_init(&dwc->lock); + dwc->mask = 1 << i; + + INIT_LIST_HEAD(&dwc->active_list); + INIT_LIST_HEAD(&dwc->queue); + INIT_LIST_HEAD(&dwc->free_list); + + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + /* Clear/disable all interrupts on all channels. */ + dma_writel(dw, CLEAR.XFER, dw->all_chan_mask); + dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); + if (pdata->is_private) + dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); + dw->dma.dev = &pdev->dev; + dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; + dw->dma.device_free_chan_resources = dwc_free_chan_resources; + + dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy; + + dw->dma.device_prep_slave_sg = dwc_prep_slave_sg; + dw->dma.device_control = dwc_control; + + dw->dma.device_tx_status = dwc_tx_status; + dw->dma.device_issue_pending = dwc_issue_pending; + + dma_writel(dw, CFG, DW_CFG_DMA_EN); + + printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n", + dev_name(&pdev->dev), pdata->nr_channels); + + dma_async_device_register(&dw->dma); + + return 0; + +err_irq: + clk_disable(dw->clk); + clk_put(dw->clk); +err_clk: + iounmap(dw->regs); + dw->regs = NULL; +err_release_r: + release_resource(io); +err_kfree: + kfree(dw); + return err; +} + +static int __exit dw_remove(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma_chan *dwc, *_dwc; + struct resource *io; + + dw_dma_off(dw); + dma_async_device_unregister(&dw->dma); + + free_irq(platform_get_irq(pdev, 0), dw); + tasklet_kill(&dw->tasklet); + + list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels, + chan.device_node) { + list_del(&dwc->chan.device_node); + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + clk_disable(dw->clk); + clk_put(dw->clk); + + iounmap(dw->regs); + dw->regs = NULL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(io->start, DW_REGLEN); + + kfree(dw); + + return 0; +} + +static void dw_shutdown(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); +} + +static int dw_suspend_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); + + return 0; +} + +static int dw_resume_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dma *dw = platform_get_drvdata(pdev); + + clk_enable(dw->clk); + dma_writel(dw, CFG, DW_CFG_DMA_EN); + return 0; +} + +static const struct dev_pm_ops dw_dev_pm_ops = { + .suspend_noirq = dw_suspend_noirq, + .resume_noirq = dw_resume_noirq, + .freeze_noirq = dw_suspend_noirq, + .thaw_noirq = dw_resume_noirq, + .restore_noirq = dw_resume_noirq, + .poweroff_noirq = dw_suspend_noirq, +}; + +static struct platform_driver dw_driver = { + .remove = __exit_p(dw_remove), + .shutdown = dw_shutdown, + .driver = { + .name = "dw_dmac", + .pm = &dw_dev_pm_ops, + }, +}; + +static int __init dw_init(void) +{ + return platform_driver_probe(&dw_driver, dw_probe); +} +subsys_initcall(dw_init); + +static void __exit dw_exit(void) +{ + platform_driver_unregister(&dw_driver); +} +module_exit(dw_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); +MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); +MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>"); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h new file mode 100644 index 00000000..f298f69e --- /dev/null +++ b/drivers/dma/dw_dmac_regs.h @@ -0,0 +1,250 @@ +/* + * Driver for the Synopsys DesignWare AHB DMA Controller + * + * Copyright (C) 2005-2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/dw_dmac.h> + +#define DW_DMA_MAX_NR_CHANNELS 8 + +/* flow controller */ +enum dw_dma_fc { + DW_DMA_FC_D_M2M, + DW_DMA_FC_D_M2P, + DW_DMA_FC_D_P2M, + DW_DMA_FC_D_P2P, + DW_DMA_FC_P_P2M, + DW_DMA_FC_SP_P2P, + DW_DMA_FC_P_M2P, + DW_DMA_FC_DP_P2P, +}; + +/* + * Redefine this macro to handle differences between 32- and 64-bit + * addressing, big vs. little endian, etc. + */ +#define DW_REG(name) u32 name; u32 __pad_##name + +/* Hardware register definitions. */ +struct dw_dma_chan_regs { + DW_REG(SAR); /* Source Address Register */ + DW_REG(DAR); /* Destination Address Register */ + DW_REG(LLP); /* Linked List Pointer */ + u32 CTL_LO; /* Control Register Low */ + u32 CTL_HI; /* Control Register High */ + DW_REG(SSTAT); + DW_REG(DSTAT); + DW_REG(SSTATAR); + DW_REG(DSTATAR); + u32 CFG_LO; /* Configuration Register Low */ + u32 CFG_HI; /* Configuration Register High */ + DW_REG(SGR); + DW_REG(DSR); +}; + +struct dw_dma_irq_regs { + DW_REG(XFER); + DW_REG(BLOCK); + DW_REG(SRC_TRAN); + DW_REG(DST_TRAN); + DW_REG(ERROR); +}; + +struct dw_dma_regs { + /* per-channel registers */ + struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS]; + + /* irq handling */ + struct dw_dma_irq_regs RAW; /* r */ + struct dw_dma_irq_regs STATUS; /* r (raw & mask) */ + struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */ + struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */ + + DW_REG(STATUS_INT); /* r */ + + /* software handshaking */ + DW_REG(REQ_SRC); + DW_REG(REQ_DST); + DW_REG(SGL_REQ_SRC); + DW_REG(SGL_REQ_DST); + DW_REG(LAST_SRC); + DW_REG(LAST_DST); + + /* miscellaneous */ + DW_REG(CFG); + DW_REG(CH_EN); + DW_REG(ID); + DW_REG(TEST); + + /* optional encoded params, 0x3c8..0x3 */ +}; + +/* Bitfields in CTL_LO */ +#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4) +#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ +#define DWC_CTLL_DST_DEC (1<<7) +#define DWC_CTLL_DST_FIX (2<<7) +#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ +#define DWC_CTLL_SRC_DEC (1<<9) +#define DWC_CTLL_SRC_FIX (2<<9) +#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14) +#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */ +#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */ +#define DWC_CTLL_FC(n) ((n) << 20) +#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */ +#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */ +#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */ +#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */ +/* plus 4 transfer types for peripheral-as-flow-controller */ +#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */ +#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */ +#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */ + +/* Bitfields in CTL_HI */ +#define DWC_CTLH_DONE 0x00001000 +#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff + +/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */ +#define DWC_CFGL_CH_PRIOR_MASK (0x7 << 5) /* priority mask */ +#define DWC_CFGL_CH_PRIOR(x) ((x) << 5) /* priority */ +#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ +#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */ +#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */ +#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */ +#define DWC_CFGL_MAX_BURST(x) ((x) << 20) +#define DWC_CFGL_RELOAD_SAR (1 << 30) +#define DWC_CFGL_RELOAD_DAR (1 << 31) + +/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */ +#define DWC_CFGH_DS_UPD_EN (1 << 5) +#define DWC_CFGH_SS_UPD_EN (1 << 6) + +/* Bitfields in SGR */ +#define DWC_SGR_SGI(x) ((x) << 0) +#define DWC_SGR_SGC(x) ((x) << 20) + +/* Bitfields in DSR */ +#define DWC_DSR_DSI(x) ((x) << 0) +#define DWC_DSR_DSC(x) ((x) << 20) + +/* Bitfields in CFG */ +#define DW_CFG_DMA_EN (1 << 0) + +#define DW_REGLEN 0x400 + +enum dw_dmac_flags { + DW_DMA_IS_CYCLIC = 0, +}; + +struct dw_dma_chan { + struct dma_chan chan; + void __iomem *ch_regs; + u8 mask; + u8 priority; + bool paused; + bool initialized; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + unsigned long flags; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + struct dw_cyclic_desc *cdesc; + + unsigned int descs_allocated; + + /* configuration passed via DMA_SLAVE_CONFIG */ + struct dma_slave_config dma_sconfig; +}; + +static inline struct dw_dma_chan_regs __iomem * +__dwc_regs(struct dw_dma_chan *dwc) +{ + return dwc->ch_regs; +} + +#define channel_readl(dwc, name) \ + readl(&(__dwc_regs(dwc)->name)) +#define channel_writel(dwc, name, val) \ + writel((val), &(__dwc_regs(dwc)->name)) + +static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct dw_dma_chan, chan); +} + +struct dw_dma { + struct dma_device dma; + void __iomem *regs; + struct tasklet_struct tasklet; + struct clk *clk; + + u8 all_chan_mask; + + struct dw_dma_chan chan[0]; +}; + +static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) +{ + return dw->regs; +} + +#define dma_readl(dw, name) \ + readl(&(__dw_regs(dw)->name)) +#define dma_writel(dw, name, val) \ + writel((val), &(__dw_regs(dw)->name)) + +#define channel_set_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | 0) + +static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct dw_dma, dma); +} + +/* LLI == Linked List Item; a.k.a. DMA block descriptor */ +struct dw_lli { + /* values that are not changed by hardware */ + dma_addr_t sar; + dma_addr_t dar; + dma_addr_t llp; /* chain to next lli */ + u32 ctllo; + /* values that may get written back: */ + u32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ + u32 sstat; + u32 dstat; +}; + +struct dw_desc { + /* FIRST values the hardware uses */ + struct dw_lli lli; + + /* THEN values for driver housekeeping */ + struct list_head desc_node; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +static inline struct dw_desc * +txd_to_dw_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct dw_desc, txd); +} diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c new file mode 100644 index 00000000..f6e9b572 --- /dev/null +++ b/drivers/dma/ep93xx_dma.c @@ -0,0 +1,1383 @@ +/* + * Driver for the Cirrus Logic EP93xx DMA Controller + * + * Copyright (C) 2011 Mika Westerberg + * + * DMA M2P implementation is based on the original + * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights: + * + * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> + * Copyright (C) 2006 Applied Data Systems + * Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com> + * + * This driver is based on dw_dmac and amba-pl08x drivers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <mach/dma.h> + +#include "dmaengine.h" + +/* M2P registers */ +#define M2P_CONTROL 0x0000 +#define M2P_CONTROL_STALLINT BIT(0) +#define M2P_CONTROL_NFBINT BIT(1) +#define M2P_CONTROL_CH_ERROR_INT BIT(3) +#define M2P_CONTROL_ENABLE BIT(4) +#define M2P_CONTROL_ICE BIT(6) + +#define M2P_INTERRUPT 0x0004 +#define M2P_INTERRUPT_STALL BIT(0) +#define M2P_INTERRUPT_NFB BIT(1) +#define M2P_INTERRUPT_ERROR BIT(3) + +#define M2P_PPALLOC 0x0008 +#define M2P_STATUS 0x000c + +#define M2P_MAXCNT0 0x0020 +#define M2P_BASE0 0x0024 +#define M2P_MAXCNT1 0x0030 +#define M2P_BASE1 0x0034 + +#define M2P_STATE_IDLE 0 +#define M2P_STATE_STALL 1 +#define M2P_STATE_ON 2 +#define M2P_STATE_NEXT 3 + +/* M2M registers */ +#define M2M_CONTROL 0x0000 +#define M2M_CONTROL_DONEINT BIT(2) +#define M2M_CONTROL_ENABLE BIT(3) +#define M2M_CONTROL_START BIT(4) +#define M2M_CONTROL_DAH BIT(11) +#define M2M_CONTROL_SAH BIT(12) +#define M2M_CONTROL_PW_SHIFT 9 +#define M2M_CONTROL_PW_8 (0 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_16 (1 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_32 (2 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_MASK (3 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_TM_SHIFT 13 +#define M2M_CONTROL_TM_TX (1 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_TM_RX (2 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_RSS_SHIFT 22 +#define M2M_CONTROL_RSS_SSPRX (1 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_SSPTX (2 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_IDE (3 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_NO_HDSK BIT(24) +#define M2M_CONTROL_PWSC_SHIFT 25 + +#define M2M_INTERRUPT 0x0004 +#define M2M_INTERRUPT_DONEINT BIT(1) + +#define M2M_BCR0 0x0010 +#define M2M_BCR1 0x0014 +#define M2M_SAR_BASE0 0x0018 +#define M2M_SAR_BASE1 0x001c +#define M2M_DAR_BASE0 0x002c +#define M2M_DAR_BASE1 0x0030 + +#define DMA_MAX_CHAN_BYTES 0xffff +#define DMA_MAX_CHAN_DESCRIPTORS 32 + +struct ep93xx_dma_engine; + +/** + * struct ep93xx_dma_desc - EP93xx specific transaction descriptor + * @src_addr: source address of the transaction + * @dst_addr: destination address of the transaction + * @size: size of the transaction (in bytes) + * @complete: this descriptor is completed + * @txd: dmaengine API descriptor + * @tx_list: list of linked descriptors + * @node: link used for putting this into a channel queue + */ +struct ep93xx_dma_desc { + u32 src_addr; + u32 dst_addr; + size_t size; + bool complete; + struct dma_async_tx_descriptor txd; + struct list_head tx_list; + struct list_head node; +}; + +/** + * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel + * @chan: dmaengine API channel + * @edma: pointer to to the engine device + * @regs: memory mapped registers + * @irq: interrupt number of the channel + * @clk: clock used by this channel + * @tasklet: channel specific tasklet used for callbacks + * @lock: lock protecting the fields following + * @flags: flags for the channel + * @buffer: which buffer to use next (0/1) + * @active: flattened chain of descriptors currently being processed + * @queue: pending descriptors which are handled next + * @free_list: list of free descriptors which can be used + * @runtime_addr: physical address currently used as dest/src (M2M only). This + * is set via %DMA_SLAVE_CONFIG before slave operation is + * prepared + * @runtime_ctrl: M2M runtime values for the control register. + * + * As EP93xx DMA controller doesn't support real chained DMA descriptors we + * will have slightly different scheme here: @active points to a head of + * flattened DMA descriptor chain. + * + * @queue holds pending transactions. These are linked through the first + * descriptor in the chain. When a descriptor is moved to the @active queue, + * the first and chained descriptors are flattened into a single list. + * + * @chan.private holds pointer to &struct ep93xx_dma_data which contains + * necessary channel configuration information. For memcpy channels this must + * be %NULL. + */ +struct ep93xx_dma_chan { + struct dma_chan chan; + const struct ep93xx_dma_engine *edma; + void __iomem *regs; + int irq; + struct clk *clk; + struct tasklet_struct tasklet; + /* protects the fields following */ + spinlock_t lock; + unsigned long flags; +/* Channel is configured for cyclic transfers */ +#define EP93XX_DMA_IS_CYCLIC 0 + + int buffer; + struct list_head active; + struct list_head queue; + struct list_head free_list; + u32 runtime_addr; + u32 runtime_ctrl; +}; + +/** + * struct ep93xx_dma_engine - the EP93xx DMA engine instance + * @dma_dev: holds the dmaengine device + * @m2m: is this an M2M or M2P device + * @hw_setup: method which sets the channel up for operation + * @hw_shutdown: shuts the channel down and flushes whatever is left + * @hw_submit: pushes active descriptor(s) to the hardware + * @hw_interrupt: handle the interrupt + * @num_channels: number of channels for this instance + * @channels: array of channels + * + * There is one instance of this struct for the M2P channels and one for the + * M2M channels. hw_xxx() methods are used to perform operations which are + * different on M2M and M2P channels. These methods are called with channel + * lock held and interrupts disabled so they cannot sleep. + */ +struct ep93xx_dma_engine { + struct dma_device dma_dev; + bool m2m; + int (*hw_setup)(struct ep93xx_dma_chan *); + void (*hw_shutdown)(struct ep93xx_dma_chan *); + void (*hw_submit)(struct ep93xx_dma_chan *); + int (*hw_interrupt)(struct ep93xx_dma_chan *); +#define INTERRUPT_UNKNOWN 0 +#define INTERRUPT_DONE 1 +#define INTERRUPT_NEXT_BUFFER 2 + + size_t num_channels; + struct ep93xx_dma_chan channels[]; +}; + +static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac) +{ + return &edmac->chan.dev->device; +} + +static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct ep93xx_dma_chan, chan); +} + +/** + * ep93xx_dma_set_active - set new active descriptor chain + * @edmac: channel + * @desc: head of the new active descriptor chain + * + * Sets @desc to be the head of the new active descriptor chain. This is the + * chain which is processed next. The active list must be empty before calling + * this function. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + BUG_ON(!list_empty(&edmac->active)); + + list_add_tail(&desc->node, &edmac->active); + + /* Flatten the @desc->tx_list chain into @edmac->active list */ + while (!list_empty(&desc->tx_list)) { + struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list, + struct ep93xx_dma_desc, node); + + /* + * We copy the callback parameters from the first descriptor + * to all the chained descriptors. This way we can call the + * callback without having to find out the first descriptor in + * the chain. Useful for cyclic transfers. + */ + d->txd.callback = desc->txd.callback; + d->txd.callback_param = desc->txd.callback_param; + + list_move_tail(&d->node, &edmac->active); + } +} + +/* Called with @edmac->lock held and interrupts disabled */ +static struct ep93xx_dma_desc * +ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac) +{ + if (list_empty(&edmac->active)) + return NULL; + + return list_first_entry(&edmac->active, struct ep93xx_dma_desc, node); +} + +/** + * ep93xx_dma_advance_active - advances to the next active descriptor + * @edmac: channel + * + * Function advances active descriptor to the next in the @edmac->active and + * returns %true if we still have descriptors in the chain to process. + * Otherwise returns %false. + * + * When the channel is in cyclic mode always returns %true. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc; + + list_rotate_left(&edmac->active); + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + return true; + + desc = ep93xx_dma_get_active(edmac); + if (!desc) + return false; + + /* + * If txd.cookie is set it means that we are back in the first + * descriptor in the chain and hence done with it. + */ + return !desc->txd.cookie; +} + +/* + * M2P DMA implementation + */ + +static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control) +{ + writel(control, edmac->regs + M2P_CONTROL); + /* + * EP93xx User's Guide states that we must perform a dummy read after + * write to the control register. + */ + readl(edmac->regs + M2P_CONTROL); +} + +static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control; + + writel(data->port & 0xf, edmac->regs + M2P_PPALLOC); + + control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE + | M2P_CONTROL_ENABLE; + m2p_set_control(edmac, control); + + return 0; +} + +static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac) +{ + return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3; +} + +static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + u32 control; + + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + while (m2p_channel_state(edmac) >= M2P_STATE_ON) + cpu_relax(); + + m2p_set_control(edmac, 0); + + while (m2p_channel_state(edmac) == M2P_STATE_STALL) + cpu_relax(); +} + +static void m2p_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc; + u32 bus_addr; + + desc = ep93xx_dma_get_active(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "M2P: empty descriptor list\n"); + return; + } + + if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV) + bus_addr = desc->src_addr; + else + bus_addr = desc->dst_addr; + + if (edmac->buffer == 0) { + writel(desc->size, edmac->regs + M2P_MAXCNT0); + writel(bus_addr, edmac->regs + M2P_BASE0); + } else { + writel(desc->size, edmac->regs + M2P_MAXCNT1); + writel(bus_addr, edmac->regs + M2P_BASE1); + } + + edmac->buffer ^= 1; +} + +static void m2p_hw_submit(struct ep93xx_dma_chan *edmac) +{ + u32 control = readl(edmac->regs + M2P_CONTROL); + + m2p_fill_desc(edmac); + control |= M2P_CONTROL_STALLINT; + + if (ep93xx_dma_advance_active(edmac)) { + m2p_fill_desc(edmac); + control |= M2P_CONTROL_NFBINT; + } + + m2p_set_control(edmac, control); +} + +static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 irq_status = readl(edmac->regs + M2P_INTERRUPT); + u32 control; + + if (irq_status & M2P_INTERRUPT_ERROR) { + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + + /* Clear the error interrupt */ + writel(1, edmac->regs + M2P_INTERRUPT); + + /* + * It seems that there is no easy way of reporting errors back + * to client so we just report the error here and continue as + * usual. + * + * Revisit this when there is a mechanism to report back the + * errors. + */ + dev_err(chan2dev(edmac), + "DMA transfer failed! Details:\n" + "\tcookie : %d\n" + "\tsrc_addr : 0x%08x\n" + "\tdst_addr : 0x%08x\n" + "\tsize : %zu\n", + desc->txd.cookie, desc->src_addr, desc->dst_addr, + desc->size); + } + + switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) { + case M2P_INTERRUPT_STALL: + /* Disable interrupts */ + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + return INTERRUPT_DONE; + + case M2P_INTERRUPT_NFB: + if (ep93xx_dma_advance_active(edmac)) + m2p_fill_desc(edmac); + + return INTERRUPT_NEXT_BUFFER; + } + + return INTERRUPT_UNKNOWN; +} + +/* + * M2M DMA implementation + * + * For the M2M transfers we don't use NFB at all. This is because it simply + * doesn't work well with memcpy transfers. When you submit both buffers it is + * extremely unlikely that you get an NFB interrupt, but it instead reports + * DONE interrupt and both buffers are already transferred which means that we + * weren't able to update the next buffer. + * + * So for now we "simulate" NFB by just submitting buffer after buffer + * without double buffering. + */ + +static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) +{ + const struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = 0; + + if (!data) { + /* This is memcpy channel, nothing to configure */ + writel(control, edmac->regs + M2M_CONTROL); + return 0; + } + + switch (data->port) { + case EP93XX_DMA_SSP: + /* + * This was found via experimenting - anything less than 5 + * causes the channel to perform only a partial transfer which + * leads to problems since we don't get DONE interrupt then. + */ + control = (5 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_NO_HDSK; + + if (data->direction == DMA_MEM_TO_DEV) { + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + control |= M2M_CONTROL_RSS_SSPTX; + } else { + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + control |= M2M_CONTROL_RSS_SSPRX; + } + break; + + case EP93XX_DMA_IDE: + /* + * This IDE part is totally untested. Values below are taken + * from the EP93xx Users's Guide and might not be correct. + */ + if (data->direction == DMA_MEM_TO_DEV) { + /* Worst case from the UG */ + control = (3 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + } else { + control = (2 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + } + + control |= M2M_CONTROL_NO_HDSK; + control |= M2M_CONTROL_RSS_IDE; + control |= M2M_CONTROL_PW_16; + break; + + default: + return -EINVAL; + } + + writel(control, edmac->regs + M2M_CONTROL); + return 0; +} + +static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + /* Just disable the channel */ + writel(0, edmac->regs + M2M_CONTROL); +} + +static void m2m_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc; + + desc = ep93xx_dma_get_active(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "M2M: empty descriptor list\n"); + return; + } + + if (edmac->buffer == 0) { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0); + writel(desc->size, edmac->regs + M2M_BCR0); + } else { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1); + writel(desc->size, edmac->regs + M2M_BCR1); + } + + edmac->buffer ^= 1; +} + +static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = readl(edmac->regs + M2M_CONTROL); + + /* + * Since we allow clients to configure PW (peripheral width) we always + * clear PW bits here and then set them according what is given in + * the runtime configuration. + */ + control &= ~M2M_CONTROL_PW_MASK; + control |= edmac->runtime_ctrl; + + m2m_fill_desc(edmac); + control |= M2M_CONTROL_DONEINT; + + /* + * Now we can finally enable the channel. For M2M channel this must be + * done _after_ the BCRx registers are programmed. + */ + control |= M2M_CONTROL_ENABLE; + writel(control, edmac->regs + M2M_CONTROL); + + if (!data) { + /* + * For memcpy channels the software trigger must be asserted + * in order to start the memcpy operation. + */ + control |= M2M_CONTROL_START; + writel(control, edmac->regs + M2M_CONTROL); + } +} + +static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 control; + + if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_DONEINT)) + return INTERRUPT_UNKNOWN; + + /* Clear the DONE bit */ + writel(0, edmac->regs + M2M_INTERRUPT); + + /* Disable interrupts and the channel */ + control = readl(edmac->regs + M2M_CONTROL); + control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_ENABLE); + writel(control, edmac->regs + M2M_CONTROL); + + /* + * Since we only get DONE interrupt we have to find out ourselves + * whether there still is something to process. So we try to advance + * the chain an see whether it succeeds. + */ + if (ep93xx_dma_advance_active(edmac)) { + edmac->edma->hw_submit(edmac); + return INTERRUPT_NEXT_BUFFER; + } + + return INTERRUPT_DONE; +} + +/* + * DMA engine API implementation + */ + +static struct ep93xx_dma_desc * +ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_desc; + struct ep93xx_dma_desc *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) { + if (async_tx_test_ack(&desc->txd)) { + list_del_init(&desc->node); + + /* Re-initialize the descriptor */ + desc->src_addr = 0; + desc->dst_addr = 0; + desc->size = 0; + desc->complete = false; + desc->txd.cookie = 0; + desc->txd.callback = NULL; + desc->txd.callback_param = NULL; + + ret = desc; + break; + } + } + spin_unlock_irqrestore(&edmac->lock, flags); + return ret; +} + +static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + if (desc) { + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_splice_init(&desc->tx_list, &edmac->free_list); + list_add(&desc->node, &edmac->free_list); + spin_unlock_irqrestore(&edmac->lock, flags); + } +} + +/** + * ep93xx_dma_advance_work - start processing the next pending transaction + * @edmac: channel + * + * If we have pending transactions queued and we are currently idling, this + * function takes the next queued transaction from the @edmac->queue and + * pushes it to the hardware for execution. + */ +static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *new; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) { + spin_unlock_irqrestore(&edmac->lock, flags); + return; + } + + /* Take the next descriptor from the pending queue */ + new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node); + list_del_init(&new->node); + + ep93xx_dma_set_active(edmac, new); + + /* Push it to the hardware */ + edmac->edma->hw_submit(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); +} + +static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc) +{ + struct device *dev = desc->txd.chan->device->dev; + + if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(dev, desc->src_addr, desc->size, + DMA_TO_DEVICE); + else + dma_unmap_page(dev, desc->src_addr, desc->size, + DMA_TO_DEVICE); + } + if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(dev, desc->dst_addr, desc->size, + DMA_FROM_DEVICE); + else + dma_unmap_page(dev, desc->dst_addr, desc->size, + DMA_FROM_DEVICE); + } +} + +static void ep93xx_dma_tasklet(unsigned long data) +{ + struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data; + struct ep93xx_dma_desc *desc, *d; + dma_async_tx_callback callback = NULL; + void *callback_param = NULL; + LIST_HEAD(list); + + spin_lock_irq(&edmac->lock); + /* + * If dma_terminate_all() was called before we get to run, the active + * list has become empty. If that happens we aren't supposed to do + * anything more than call ep93xx_dma_advance_work(). + */ + desc = ep93xx_dma_get_active(edmac); + if (desc) { + if (desc->complete) { + /* mark descriptor complete for non cyclic case only */ + if (!test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + dma_cookie_complete(&desc->txd); + list_splice_init(&edmac->active, &list); + } + callback = desc->txd.callback; + callback_param = desc->txd.callback_param; + } + spin_unlock_irq(&edmac->lock); + + /* Pick up the next descriptor from the queue */ + ep93xx_dma_advance_work(edmac); + + /* Now we can release all the chained descriptors */ + list_for_each_entry_safe(desc, d, &list, node) { + /* + * For the memcpy channels the API requires us to unmap the + * buffers unless requested otherwise. + */ + if (!edmac->chan.private) + ep93xx_dma_unmap_buffers(desc); + + ep93xx_dma_desc_put(edmac, desc); + } + + if (callback) + callback(callback_param); +} + +static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id) +{ + struct ep93xx_dma_chan *edmac = dev_id; + struct ep93xx_dma_desc *desc; + irqreturn_t ret = IRQ_HANDLED; + + spin_lock(&edmac->lock); + + desc = ep93xx_dma_get_active(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), + "got interrupt while active list is empty\n"); + spin_unlock(&edmac->lock); + return IRQ_NONE; + } + + switch (edmac->edma->hw_interrupt(edmac)) { + case INTERRUPT_DONE: + desc->complete = true; + tasklet_schedule(&edmac->tasklet); + break; + + case INTERRUPT_NEXT_BUFFER: + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + tasklet_schedule(&edmac->tasklet); + break; + + default: + dev_warn(chan2dev(edmac), "unknown interrupt!\n"); + ret = IRQ_NONE; + break; + } + + spin_unlock(&edmac->lock); + return ret; +} + +/** + * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed + * @tx: descriptor to be executed + * + * Function will execute given descriptor on the hardware or if the hardware + * is busy, queue the descriptor to be executed later on. Returns cookie which + * can be used to poll the status of the descriptor. + */ +static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan); + struct ep93xx_dma_desc *desc; + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + cookie = dma_cookie_assign(tx); + + desc = container_of(tx, struct ep93xx_dma_desc, txd); + + /* + * If nothing is currently prosessed, we push this descriptor + * directly to the hardware. Otherwise we put the descriptor + * to the pending queue. + */ + if (list_empty(&edmac->active)) { + ep93xx_dma_set_active(edmac, desc); + edmac->edma->hw_submit(edmac); + } else { + list_add_tail(&desc->node, &edmac->queue); + } + + spin_unlock_irqrestore(&edmac->lock, flags); + return cookie; +} + +/** + * ep93xx_dma_alloc_chan_resources - allocate resources for the channel + * @chan: channel to allocate resources + * + * Function allocates necessary resources for the given DMA channel and + * returns number of allocated descriptors for the channel. Negative errno + * is returned in case of failure. + */ +static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_data *data = chan->private; + const char *name = dma_chan_name(chan); + int ret, i; + + /* Sanity check the channel parameters */ + if (!edmac->edma->m2m) { + if (!data) + return -EINVAL; + if (data->port < EP93XX_DMA_I2S1 || + data->port > EP93XX_DMA_IRDA) + return -EINVAL; + if (data->direction != ep93xx_dma_chan_direction(chan)) + return -EINVAL; + } else { + if (data) { + switch (data->port) { + case EP93XX_DMA_SSP: + case EP93XX_DMA_IDE: + if (data->direction != DMA_MEM_TO_DEV && + data->direction != DMA_DEV_TO_MEM) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + } + + if (data && data->name) + name = data->name; + + ret = clk_enable(edmac->clk); + if (ret) + return ret; + + ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac); + if (ret) + goto fail_clk_disable; + + spin_lock_irq(&edmac->lock); + dma_cookie_init(&edmac->chan); + ret = edmac->edma->hw_setup(edmac); + spin_unlock_irq(&edmac->lock); + + if (ret) + goto fail_free_irq; + + for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) { + struct ep93xx_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + dev_warn(chan2dev(edmac), "not enough descriptors\n"); + break; + } + + INIT_LIST_HEAD(&desc->tx_list); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.tx_submit = ep93xx_dma_tx_submit; + + ep93xx_dma_desc_put(edmac, desc); + } + + return i; + +fail_free_irq: + free_irq(edmac->irq, edmac); +fail_clk_disable: + clk_disable(edmac->clk); + + return ret; +} + +/** + * ep93xx_dma_free_chan_resources - release resources for the channel + * @chan: channel + * + * Function releases all the resources allocated for the given channel. + * The channel must be idle when this is called. + */ +static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *d; + unsigned long flags; + LIST_HEAD(list); + + BUG_ON(!list_empty(&edmac->active)); + BUG_ON(!list_empty(&edmac->queue)); + + spin_lock_irqsave(&edmac->lock, flags); + edmac->edma->hw_shutdown(edmac); + edmac->runtime_addr = 0; + edmac->runtime_ctrl = 0; + edmac->buffer = 0; + list_splice_init(&edmac->free_list, &list); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, d, &list, node) + kfree(desc); + + clk_disable(edmac->clk); + free_irq(edmac->irq, edmac); +} + +/** + * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation + * @chan: channel + * @dest: destination bus address + * @src: source bus address + * @len: size of the transaction + * @flags: flags for the descriptor + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t bytes, offset; + + first = NULL; + for (offset = 0; offset < len; offset += bytes) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES); + + desc->src_addr = src + offset; + desc->dst_addr = dest + offset; + desc->size = bytes; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation + * @chan: channel + * @sgl: list of buffers to transfer + * @sg_len: number of entries in @sgl + * @dir: direction of tha DMA transfer + * @flags: flags for the descriptor + * @context: operation context (ignored) + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + struct scatterlist *sg; + int i; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + first = NULL; + for_each_sg(sgl, sg, sg_len, i) { + size_t sg_len = sg_dma_len(sg); + + if (sg_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big transfer size %d\n", + sg_len); + goto fail; + } + + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + if (dir == DMA_MEM_TO_DEV) { + desc->src_addr = sg_dma_address(sg); + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = sg_dma_address(sg); + } + desc->size = sg_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation + * @chan: channel + * @dma_addr: DMA mapped address of the buffer + * @buf_len: length of the buffer (in bytes) + * @period_len: lenght of a single period + * @dir: direction of the operation + * @context: operation context (ignored) + * + * Prepares a descriptor for cyclic DMA operation. This means that once the + * descriptor is submitted, we will be submitting in a @period_len sized + * buffers and calling callback once the period has been elapsed. Transfer + * terminates only when client calls dmaengine_terminate_all() for this + * channel. + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction dir, void *context) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t offset = 0; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + if (period_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big period length %d\n", + period_len); + return NULL; + } + + /* Split the buffer into period size chunks */ + first = NULL; + for (offset = 0; offset < buf_len; offset += period_len) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + if (dir == DMA_MEM_TO_DEV) { + desc->src_addr = dma_addr + offset; + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = dma_addr + offset; + } + + desc->size = period_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_terminate_all - terminate all transactions + * @edmac: channel + * + * Stops all DMA transactions. All descriptors are put back to the + * @edmac->free_list and callbacks are _not_ called. + */ +static int ep93xx_dma_terminate_all(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_d; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&edmac->lock, flags); + /* First we disable and flush the DMA channel */ + edmac->edma->hw_shutdown(edmac); + clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags); + list_splice_init(&edmac->active, &list); + list_splice_init(&edmac->queue, &list); + /* + * We then re-enable the channel. This way we can continue submitting + * the descriptors by just calling ->hw_submit() again. + */ + edmac->edma->hw_setup(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, _d, &list, node) + ep93xx_dma_desc_put(edmac, desc); + + return 0; +} + +static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac, + struct dma_slave_config *config) +{ + enum dma_slave_buswidth width; + unsigned long flags; + u32 addr, ctrl; + + if (!edmac->edma->m2m) + return -EINVAL; + + switch (config->direction) { + case DMA_DEV_TO_MEM: + width = config->src_addr_width; + addr = config->src_addr; + break; + + case DMA_MEM_TO_DEV: + width = config->dst_addr_width; + addr = config->dst_addr; + break; + + default: + return -EINVAL; + } + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + ctrl = 0; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + ctrl = M2M_CONTROL_PW_16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + ctrl = M2M_CONTROL_PW_32; + break; + default: + return -EINVAL; + } + + spin_lock_irqsave(&edmac->lock, flags); + edmac->runtime_addr = addr; + edmac->runtime_ctrl = ctrl; + spin_unlock_irqrestore(&edmac->lock, flags); + + return 0; +} + +/** + * ep93xx_dma_control - manipulate all pending operations on a channel + * @chan: channel + * @cmd: control command to perform + * @arg: optional argument + * + * Controls the channel. Function returns %0 in case of success or negative + * error in case of failure. + */ +static int ep93xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct dma_slave_config *config; + + switch (cmd) { + case DMA_TERMINATE_ALL: + return ep93xx_dma_terminate_all(edmac); + + case DMA_SLAVE_CONFIG: + config = (struct dma_slave_config *)arg; + return ep93xx_dma_slave_config(edmac, config); + + default: + break; + } + + return -ENOSYS; +} + +/** + * ep93xx_dma_tx_status - check if a transaction is completed + * @chan: channel + * @cookie: transaction specific cookie + * @state: state of the transaction is stored here if given + * + * This function can be used to query state of a given transaction. + */ +static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + enum dma_status ret; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + ret = dma_cookie_status(chan, cookie, state); + spin_unlock_irqrestore(&edmac->lock, flags); + + return ret; +} + +/** + * ep93xx_dma_issue_pending - push pending transactions to the hardware + * @chan: channel + * + * When this function is called, all pending transactions are pushed to the + * hardware and executed. + */ +static void ep93xx_dma_issue_pending(struct dma_chan *chan) +{ + ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan)); +} + +static int __init ep93xx_dma_probe(struct platform_device *pdev) +{ + struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct ep93xx_dma_engine *edma; + struct dma_device *dma_dev; + size_t edma_size; + int ret, i; + + edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan); + edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL); + if (!edma) + return -ENOMEM; + + dma_dev = &edma->dma_dev; + edma->m2m = platform_get_device_id(pdev)->driver_data; + edma->num_channels = pdata->num_channels; + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < pdata->num_channels; i++) { + const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i]; + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + + edmac->chan.device = dma_dev; + edmac->regs = cdata->base; + edmac->irq = cdata->irq; + edmac->edma = edma; + + edmac->clk = clk_get(NULL, cdata->name); + if (IS_ERR(edmac->clk)) { + dev_warn(&pdev->dev, "failed to get clock for %s\n", + cdata->name); + continue; + } + + spin_lock_init(&edmac->lock); + INIT_LIST_HEAD(&edmac->active); + INIT_LIST_HEAD(&edmac->queue); + INIT_LIST_HEAD(&edmac->free_list); + tasklet_init(&edmac->tasklet, ep93xx_dma_tasklet, + (unsigned long)edmac); + + list_add_tail(&edmac->chan.device_node, + &dma_dev->channels); + } + + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + + dma_dev->dev = &pdev->dev; + dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources; + dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources; + dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic; + dma_dev->device_control = ep93xx_dma_control; + dma_dev->device_issue_pending = ep93xx_dma_issue_pending; + dma_dev->device_tx_status = ep93xx_dma_tx_status; + + dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES); + + if (edma->m2m) { + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy; + + edma->hw_setup = m2m_hw_setup; + edma->hw_shutdown = m2m_hw_shutdown; + edma->hw_submit = m2m_hw_submit; + edma->hw_interrupt = m2m_hw_interrupt; + } else { + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + edma->hw_setup = m2p_hw_setup; + edma->hw_shutdown = m2p_hw_shutdown; + edma->hw_submit = m2p_hw_submit; + edma->hw_interrupt = m2p_hw_interrupt; + } + + ret = dma_async_device_register(dma_dev); + if (unlikely(ret)) { + for (i = 0; i < edma->num_channels; i++) { + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + if (!IS_ERR_OR_NULL(edmac->clk)) + clk_put(edmac->clk); + } + kfree(edma); + } else { + dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n", + edma->m2m ? "M" : "P"); + } + + return ret; +} + +static struct platform_device_id ep93xx_dma_driver_ids[] = { + { "ep93xx-dma-m2p", 0 }, + { "ep93xx-dma-m2m", 1 }, + { }, +}; + +static struct platform_driver ep93xx_dma_driver = { + .driver = { + .name = "ep93xx-dma", + }, + .id_table = ep93xx_dma_driver_ids, +}; + +static int __init ep93xx_dma_module_init(void) +{ + return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe); +} +subsys_initcall(ep93xx_dma_module_init); + +MODULE_AUTHOR("Mika Westerberg <mika.westerberg@iki.fi>"); +MODULE_DESCRIPTION("EP93xx DMA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c new file mode 100644 index 00000000..8f84761f --- /dev/null +++ b/drivers/dma/fsldma.c @@ -0,0 +1,1472 @@ +/* + * Freescale MPC85xx, MPC83xx DMA Engine support + * + * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: + * Zhang Wei <wei.zhang@freescale.com>, Jul 2007 + * Ebony Zhu <ebony.zhu@freescale.com>, May 2007 + * + * Description: + * DMA engine driver for Freescale MPC8540 DMA controller, which is + * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. + * The support for MPC8349 DMA controller is also added. + * + * This driver instructs the DMA controller to issue the PCI Read Multiple + * command for PCI read operations, instead of using the default PCI Read Line + * command. Please be aware that this setting may result in read pre-fetching + * on some platforms. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/of_platform.h> + +#include "dmaengine.h" +#include "fsldma.h" + +#define chan_dbg(chan, fmt, arg...) \ + dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg) +#define chan_err(chan, fmt, arg...) \ + dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) + +static const char msg_ld_oom[] = "No free memory for link descriptor"; + +/* + * Register Helpers + */ + +static void set_sr(struct fsldma_chan *chan, u32 val) +{ + DMA_OUT(chan, &chan->regs->sr, val, 32); +} + +static u32 get_sr(struct fsldma_chan *chan) +{ + return DMA_IN(chan, &chan->regs->sr, 32); +} + +static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr) +{ + DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64); +} + +static dma_addr_t get_cdar(struct fsldma_chan *chan) +{ + return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN; +} + +static u32 get_bcr(struct fsldma_chan *chan) +{ + return DMA_IN(chan, &chan->regs->bcr, 32); +} + +/* + * Descriptor Helpers + */ + +static void set_desc_cnt(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, u32 count) +{ + hw->count = CPU_TO_DMA(chan, count, 32); +} + +static u32 get_desc_cnt(struct fsldma_chan *chan, struct fsl_desc_sw *desc) +{ + return DMA_TO_CPU(chan, desc->hw.count, 32); +} + +static void set_desc_src(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, dma_addr_t src) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0; + hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64); +} + +static dma_addr_t get_desc_src(struct fsldma_chan *chan, + struct fsl_desc_sw *desc) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0; + return DMA_TO_CPU(chan, desc->hw.src_addr, 64) & ~snoop_bits; +} + +static void set_desc_dst(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, dma_addr_t dst) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0; + hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64); +} + +static dma_addr_t get_desc_dst(struct fsldma_chan *chan, + struct fsl_desc_sw *desc) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0; + return DMA_TO_CPU(chan, desc->hw.dst_addr, 64) & ~snoop_bits; +} + +static void set_desc_next(struct fsldma_chan *chan, + struct fsl_dma_ld_hw *hw, dma_addr_t next) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX) + ? FSL_DMA_SNEN : 0; + hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64); +} + +static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc) +{ + u64 snoop_bits; + + snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX) + ? FSL_DMA_SNEN : 0; + + desc->hw.next_ln_addr = CPU_TO_DMA(chan, + DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL + | snoop_bits, 64); +} + +/* + * DMA Engine Hardware Control Helpers + */ + +static void dma_init(struct fsldma_chan *chan) +{ + /* Reset the channel */ + DMA_OUT(chan, &chan->regs->mr, 0, 32); + + switch (chan->feature & FSL_DMA_IP_MASK) { + case FSL_DMA_IP_85XX: + /* Set the channel to below modes: + * EIE - Error interrupt enable + * EOLNIE - End of links interrupt enable + * BWC - Bandwidth sharing among channels + */ + DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC + | FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32); + break; + case FSL_DMA_IP_83XX: + /* Set the channel to below modes: + * EOTIE - End-of-transfer interrupt enable + * PRC_RM - PCI read multiple + */ + DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE + | FSL_DMA_MR_PRC_RM, 32); + break; + } +} + +static int dma_is_idle(struct fsldma_chan *chan) +{ + u32 sr = get_sr(chan); + return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH); +} + +/* + * Start the DMA controller + * + * Preconditions: + * - the CDAR register must point to the start descriptor + * - the MRn[CS] bit must be cleared + */ +static void dma_start(struct fsldma_chan *chan) +{ + u32 mode; + + mode = DMA_IN(chan, &chan->regs->mr, 32); + + if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { + DMA_OUT(chan, &chan->regs->bcr, 0, 32); + mode |= FSL_DMA_MR_EMP_EN; + } else { + mode &= ~FSL_DMA_MR_EMP_EN; + } + + if (chan->feature & FSL_DMA_CHAN_START_EXT) { + mode |= FSL_DMA_MR_EMS_EN; + } else { + mode &= ~FSL_DMA_MR_EMS_EN; + mode |= FSL_DMA_MR_CS; + } + + DMA_OUT(chan, &chan->regs->mr, mode, 32); +} + +static void dma_halt(struct fsldma_chan *chan) +{ + u32 mode; + int i; + + /* read the mode register */ + mode = DMA_IN(chan, &chan->regs->mr, 32); + + /* + * The 85xx controller supports channel abort, which will stop + * the current transfer. On 83xx, this bit is the transfer error + * mask bit, which should not be changed. + */ + if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { + mode |= FSL_DMA_MR_CA; + DMA_OUT(chan, &chan->regs->mr, mode, 32); + + mode &= ~FSL_DMA_MR_CA; + } + + /* stop the DMA controller */ + mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN); + DMA_OUT(chan, &chan->regs->mr, mode, 32); + + /* wait for the DMA controller to become idle */ + for (i = 0; i < 100; i++) { + if (dma_is_idle(chan)) + return; + + udelay(10); + } + + if (!dma_is_idle(chan)) + chan_err(chan, "DMA halt timeout!\n"); +} + +/** + * fsl_chan_set_src_loop_size - Set source address hold transfer size + * @chan : Freescale DMA channel + * @size : Address loop size, 0 for disable loop + * + * The set source address hold transfer size. The source + * address hold or loop transfer size is when the DMA transfer + * data from source address (SA), if the loop size is 4, the DMA will + * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA, + * SA + 1 ... and so on. + */ +static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size) +{ + u32 mode; + + mode = DMA_IN(chan, &chan->regs->mr, 32); + + switch (size) { + case 0: + mode &= ~FSL_DMA_MR_SAHE; + break; + case 1: + case 2: + case 4: + case 8: + mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14); + break; + } + + DMA_OUT(chan, &chan->regs->mr, mode, 32); +} + +/** + * fsl_chan_set_dst_loop_size - Set destination address hold transfer size + * @chan : Freescale DMA channel + * @size : Address loop size, 0 for disable loop + * + * The set destination address hold transfer size. The destination + * address hold or loop transfer size is when the DMA transfer + * data to destination address (TA), if the loop size is 4, the DMA will + * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA, + * TA + 1 ... and so on. + */ +static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size) +{ + u32 mode; + + mode = DMA_IN(chan, &chan->regs->mr, 32); + + switch (size) { + case 0: + mode &= ~FSL_DMA_MR_DAHE; + break; + case 1: + case 2: + case 4: + case 8: + mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16); + break; + } + + DMA_OUT(chan, &chan->regs->mr, mode, 32); +} + +/** + * fsl_chan_set_request_count - Set DMA Request Count for external control + * @chan : Freescale DMA channel + * @size : Number of bytes to transfer in a single request + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA request count is how many bytes are allowed to transfer before + * pausing the channel, after which a new assertion of DREQ# resumes channel + * operation. + * + * A size of 0 disables external pause control. The maximum size is 1024. + */ +static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size) +{ + u32 mode; + + BUG_ON(size > 1024); + + mode = DMA_IN(chan, &chan->regs->mr, 32); + mode |= (__ilog2(size) << 24) & 0x0f000000; + + DMA_OUT(chan, &chan->regs->mr, mode, 32); +} + +/** + * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * @chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA Request Count feature should be used in addition to this feature + * to set the number of bytes to transfer before pausing the channel. + */ +static void fsl_chan_toggle_ext_pause(struct fsldma_chan *chan, int enable) +{ + if (enable) + chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; + else + chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; +} + +/** + * fsl_chan_toggle_ext_start - Toggle channel external start status + * @chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * If enable the external start, the channel can be started by an + * external DMA start pin. So the dma_start() does not start the + * transfer immediately. The DMA channel will wait for the + * control pin asserted. + */ +static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable) +{ + if (enable) + chan->feature |= FSL_DMA_CHAN_START_EXT; + else + chan->feature &= ~FSL_DMA_CHAN_START_EXT; +} + +static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc) +{ + struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev); + + if (list_empty(&chan->ld_pending)) + goto out_splice; + + /* + * Add the hardware descriptor to the chain of hardware descriptors + * that already exists in memory. + * + * This will un-set the EOL bit of the existing transaction, and the + * last link in this transaction will become the EOL descriptor. + */ + set_desc_next(chan, &tail->hw, desc->async_tx.phys); + + /* + * Add the software descriptor and all children to the list + * of pending transactions + */ +out_splice: + list_splice_tail_init(&desc->tx_list, &chan->ld_pending); +} + +static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct fsldma_chan *chan = to_fsl_chan(tx->chan); + struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); + struct fsl_desc_sw *child; + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&chan->desc_lock, flags); + + /* + * assign cookies to all of the software descriptors + * that make up this transaction + */ + list_for_each_entry(child, &desc->tx_list, node) { + cookie = dma_cookie_assign(&child->async_tx); + } + + /* put this transaction onto the tail of the pending queue */ + append_ld_queue(chan, desc); + + spin_unlock_irqrestore(&chan->desc_lock, flags); + + return cookie; +} + +/** + * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool. + * @chan : Freescale DMA channel + * + * Return - The descriptor allocated. NULL for failed. + */ +static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc; + dma_addr_t pdesc; + + desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + if (!desc) { + chan_dbg(chan, "out of memory for link descriptor\n"); + return NULL; + } + + memset(desc, 0, sizeof(*desc)); + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = fsl_dma_tx_submit; + desc->async_tx.phys = pdesc; + +#ifdef FSL_DMA_LD_DEBUG + chan_dbg(chan, "LD %p allocated\n", desc); +#endif + + return desc; +} + +/** + * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel. + * @chan : Freescale DMA channel + * + * This function will create a dma pool for descriptor allocation. + * + * Return - The number of descriptors allocated. + */ +static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + + /* Has this channel already been allocated? */ + if (chan->desc_pool) + return 1; + + /* + * We need the descriptor to be aligned to 32bytes + * for meeting FSL DMA specification requirement. + */ + chan->desc_pool = dma_pool_create(chan->name, chan->dev, + sizeof(struct fsl_desc_sw), + __alignof__(struct fsl_desc_sw), 0); + if (!chan->desc_pool) { + chan_err(chan, "unable to allocate descriptor pool\n"); + return -ENOMEM; + } + + /* there is at least one descriptor free to be allocated */ + return 1; +} + +/** + * fsldma_free_desc_list - Free all descriptors in a queue + * @chan: Freescae DMA channel + * @list: the list to free + * + * LOCKING: must hold chan->desc_lock + */ +static void fsldma_free_desc_list(struct fsldma_chan *chan, + struct list_head *list) +{ + struct fsl_desc_sw *desc, *_desc; + + list_for_each_entry_safe(desc, _desc, list, node) { + list_del(&desc->node); +#ifdef FSL_DMA_LD_DEBUG + chan_dbg(chan, "LD %p free\n", desc); +#endif + dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); + } +} + +static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan, + struct list_head *list) +{ + struct fsl_desc_sw *desc, *_desc; + + list_for_each_entry_safe_reverse(desc, _desc, list, node) { + list_del(&desc->node); +#ifdef FSL_DMA_LD_DEBUG + chan_dbg(chan, "LD %p free\n", desc); +#endif + dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); + } +} + +/** + * fsl_dma_free_chan_resources - Free all resources of the channel. + * @chan : Freescale DMA channel + */ +static void fsl_dma_free_chan_resources(struct dma_chan *dchan) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + unsigned long flags; + + chan_dbg(chan, "free all channel resources\n"); + spin_lock_irqsave(&chan->desc_lock, flags); + fsldma_free_desc_list(chan, &chan->ld_pending); + fsldma_free_desc_list(chan, &chan->ld_running); + spin_unlock_irqrestore(&chan->desc_lock, flags); + + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; +} + +static struct dma_async_tx_descriptor * +fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags) +{ + struct fsldma_chan *chan; + struct fsl_desc_sw *new; + + if (!dchan) + return NULL; + + chan = to_fsl_chan(dchan); + + new = fsl_dma_alloc_descriptor(chan); + if (!new) { + chan_err(chan, "%s\n", msg_ld_oom); + return NULL; + } + + new->async_tx.cookie = -EBUSY; + new->async_tx.flags = flags; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &new->tx_list); + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(chan, new); + + return &new->async_tx; +} + +static struct dma_async_tx_descriptor * +fsl_dma_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct fsldma_chan *chan; + struct fsl_desc_sw *first = NULL, *prev = NULL, *new; + size_t copy; + + if (!dchan) + return NULL; + + if (!len) + return NULL; + + chan = to_fsl_chan(dchan); + + do { + + /* Allocate the link descriptor from DMA pool */ + new = fsl_dma_alloc_descriptor(chan); + if (!new) { + chan_err(chan, "%s\n", msg_ld_oom); + goto fail; + } + + copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT); + + set_desc_cnt(chan, &new->hw, copy); + set_desc_src(chan, &new->hw, dma_src); + set_desc_dst(chan, &new->hw, dma_dst); + + if (!first) + first = new; + else + set_desc_next(chan, &prev->hw, new->async_tx.phys); + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy; + dma_src += copy; + dma_dst += copy; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(chan, new); + + return &first->async_tx; + +fail: + if (!first) + return NULL; + + fsldma_free_desc_list_reverse(chan, &first->tx_list); + return NULL; +} + +static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags) +{ + struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; + struct fsldma_chan *chan = to_fsl_chan(dchan); + size_t dst_avail, src_avail; + dma_addr_t dst, src; + size_t len; + + /* basic sanity checks */ + if (dst_nents == 0 || src_nents == 0) + return NULL; + + if (dst_sg == NULL || src_sg == NULL) + return NULL; + + /* + * TODO: should we check that both scatterlists have the same + * TODO: number of bytes in total? Is that really an error? + */ + + /* get prepared for the loop */ + dst_avail = sg_dma_len(dst_sg); + src_avail = sg_dma_len(src_sg); + + /* run until we are out of scatterlist entries */ + while (true) { + + /* create the largest transaction possible */ + len = min_t(size_t, src_avail, dst_avail); + len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT); + if (len == 0) + goto fetch; + + dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail; + src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail; + + /* allocate and populate the descriptor */ + new = fsl_dma_alloc_descriptor(chan); + if (!new) { + chan_err(chan, "%s\n", msg_ld_oom); + goto fail; + } + + set_desc_cnt(chan, &new->hw, len); + set_desc_src(chan, &new->hw, src); + set_desc_dst(chan, &new->hw, dst); + + if (!first) + first = new; + else + set_desc_next(chan, &prev->hw, new->async_tx.phys); + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + prev = new; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + + /* update metadata */ + dst_avail -= len; + src_avail -= len; + +fetch: + /* fetch the next dst scatterlist entry */ + if (dst_avail == 0) { + + /* no more entries: we're done */ + if (dst_nents == 0) + break; + + /* fetch the next entry: if there are no more: done */ + dst_sg = sg_next(dst_sg); + if (dst_sg == NULL) + break; + + dst_nents--; + dst_avail = sg_dma_len(dst_sg); + } + + /* fetch the next src scatterlist entry */ + if (src_avail == 0) { + + /* no more entries: we're done */ + if (src_nents == 0) + break; + + /* fetch the next entry: if there are no more: done */ + src_sg = sg_next(src_sg); + if (src_sg == NULL) + break; + + src_nents--; + src_avail = sg_dma_len(src_sg); + } + } + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(chan, new); + + return &first->async_tx; + +fail: + if (!first) + return NULL; + + fsldma_free_desc_list_reverse(chan, &first->tx_list); + return NULL; +} + +/** + * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: DMAEngine flags + * @context: transaction context (ignored) + * + * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the + * DMA_SLAVE API, this gets the device-specific information from the + * chan->private variable. + */ +static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + /* + * This operation is not supported on the Freescale DMA controller + * + * However, we need to provide the function pointer to allow the + * device_control() method to work. + */ + return NULL; +} + +static int fsl_dma_device_control(struct dma_chan *dchan, + enum dma_ctrl_cmd cmd, unsigned long arg) +{ + struct dma_slave_config *config; + struct fsldma_chan *chan; + unsigned long flags; + int size; + + if (!dchan) + return -EINVAL; + + chan = to_fsl_chan(dchan); + + switch (cmd) { + case DMA_TERMINATE_ALL: + spin_lock_irqsave(&chan->desc_lock, flags); + + /* Halt the DMA engine */ + dma_halt(chan); + + /* Remove and free all of the descriptors in the LD queue */ + fsldma_free_desc_list(chan, &chan->ld_pending); + fsldma_free_desc_list(chan, &chan->ld_running); + chan->idle = true; + + spin_unlock_irqrestore(&chan->desc_lock, flags); + return 0; + + case DMA_SLAVE_CONFIG: + config = (struct dma_slave_config *)arg; + + /* make sure the channel supports setting burst size */ + if (!chan->set_request_count) + return -ENXIO; + + /* we set the controller burst size depending on direction */ + if (config->direction == DMA_MEM_TO_DEV) + size = config->dst_addr_width * config->dst_maxburst; + else + size = config->src_addr_width * config->src_maxburst; + + chan->set_request_count(chan, size); + return 0; + + case FSLDMA_EXTERNAL_START: + + /* make sure the channel supports external start */ + if (!chan->toggle_ext_start) + return -ENXIO; + + chan->toggle_ext_start(chan, arg); + return 0; + + default: + return -ENXIO; + } + + return 0; +} + +/** + * fsldma_cleanup_descriptor - cleanup and free a single link descriptor + * @chan: Freescale DMA channel + * @desc: descriptor to cleanup and free + * + * This function is used on a descriptor which has been executed by the DMA + * controller. It will run any callbacks, submit any dependencies, and then + * free the descriptor. + */ +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan, + struct fsl_desc_sw *desc) +{ + struct dma_async_tx_descriptor *txd = &desc->async_tx; + struct device *dev = chan->common.device->dev; + dma_addr_t src = get_desc_src(chan, desc); + dma_addr_t dst = get_desc_dst(chan, desc); + u32 len = get_desc_cnt(chan, desc); + + /* Run the link descriptor callback function */ + if (txd->callback) { +#ifdef FSL_DMA_LD_DEBUG + chan_dbg(chan, "LD %p callback\n", desc); +#endif + txd->callback(txd->callback_param); + } + + /* Run any dependencies */ + dma_run_dependencies(txd); + + /* Unmap the dst buffer, if requested */ + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE); + else + dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE); + } + + /* Unmap the src buffer, if requested */ + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(dev, src, len, DMA_TO_DEVICE); + else + dma_unmap_page(dev, src, len, DMA_TO_DEVICE); + } + +#ifdef FSL_DMA_LD_DEBUG + chan_dbg(chan, "LD %p free\n", desc); +#endif + dma_pool_free(chan->desc_pool, desc, txd->phys); +} + +/** + * fsl_chan_xfer_ld_queue - transfer any pending transactions + * @chan : Freescale DMA channel + * + * HARDWARE STATE: idle + * LOCKING: must hold chan->desc_lock + */ +static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc; + + /* + * If the list of pending descriptors is empty, then we + * don't need to do any work at all + */ + if (list_empty(&chan->ld_pending)) { + chan_dbg(chan, "no pending LDs\n"); + return; + } + + /* + * The DMA controller is not idle, which means that the interrupt + * handler will start any queued transactions when it runs after + * this transaction finishes + */ + if (!chan->idle) { + chan_dbg(chan, "DMA controller still busy\n"); + return; + } + + /* + * If there are some link descriptors which have not been + * transferred, we need to start the controller + */ + + /* + * Move all elements from the queue of pending transactions + * onto the list of running transactions + */ + chan_dbg(chan, "idle, starting controller\n"); + desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node); + list_splice_tail_init(&chan->ld_pending, &chan->ld_running); + + /* + * The 85xx DMA controller doesn't clear the channel start bit + * automatically at the end of a transfer. Therefore we must clear + * it in software before starting the transfer. + */ + if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { + u32 mode; + + mode = DMA_IN(chan, &chan->regs->mr, 32); + mode &= ~FSL_DMA_MR_CS; + DMA_OUT(chan, &chan->regs->mr, mode, 32); + } + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + set_cdar(chan, desc->async_tx.phys); + get_cdar(chan); + + dma_start(chan); + chan->idle = false; +} + +/** + * fsl_dma_memcpy_issue_pending - Issue the DMA start command + * @chan : Freescale DMA channel + */ +static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->desc_lock, flags); + fsl_chan_xfer_ld_queue(chan); + spin_unlock_irqrestore(&chan->desc_lock, flags); +} + +/** + * fsl_tx_status - Determine the DMA status + * @chan : Freescale DMA channel + */ +static enum dma_status fsl_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct fsldma_chan *chan = to_fsl_chan(dchan); + enum dma_status ret; + unsigned long flags; + + spin_lock_irqsave(&chan->desc_lock, flags); + ret = dma_cookie_status(dchan, cookie, txstate); + spin_unlock_irqrestore(&chan->desc_lock, flags); + + return ret; +} + +/*----------------------------------------------------------------------------*/ +/* Interrupt Handling */ +/*----------------------------------------------------------------------------*/ + +static irqreturn_t fsldma_chan_irq(int irq, void *data) +{ + struct fsldma_chan *chan = data; + u32 stat; + + /* save and clear the status register */ + stat = get_sr(chan); + set_sr(chan, stat); + chan_dbg(chan, "irq: stat = 0x%x\n", stat); + + /* check that this was really our device */ + stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH); + if (!stat) + return IRQ_NONE; + + if (stat & FSL_DMA_SR_TE) + chan_err(chan, "Transfer Error!\n"); + + /* + * Programming Error + * The DMA_INTERRUPT async_tx is a NULL transfer, which will + * triger a PE interrupt. + */ + if (stat & FSL_DMA_SR_PE) { + chan_dbg(chan, "irq: Programming Error INT\n"); + stat &= ~FSL_DMA_SR_PE; + if (get_bcr(chan) != 0) + chan_err(chan, "Programming Error!\n"); + } + + /* + * For MPC8349, EOCDI event need to update cookie + * and start the next transfer if it exist. + */ + if (stat & FSL_DMA_SR_EOCDI) { + chan_dbg(chan, "irq: End-of-Chain link INT\n"); + stat &= ~FSL_DMA_SR_EOCDI; + } + + /* + * If it current transfer is the end-of-transfer, + * we should clear the Channel Start bit for + * prepare next transfer. + */ + if (stat & FSL_DMA_SR_EOLNI) { + chan_dbg(chan, "irq: End-of-link INT\n"); + stat &= ~FSL_DMA_SR_EOLNI; + } + + /* check that the DMA controller is really idle */ + if (!dma_is_idle(chan)) + chan_err(chan, "irq: controller not idle!\n"); + + /* check that we handled all of the bits */ + if (stat) + chan_err(chan, "irq: unhandled sr 0x%08x\n", stat); + + /* + * Schedule the tasklet to handle all cleanup of the current + * transaction. It will start a new transaction if there is + * one pending. + */ + tasklet_schedule(&chan->tasklet); + chan_dbg(chan, "irq: Exit\n"); + return IRQ_HANDLED; +} + +static void dma_do_tasklet(unsigned long data) +{ + struct fsldma_chan *chan = (struct fsldma_chan *)data; + struct fsl_desc_sw *desc, *_desc; + LIST_HEAD(ld_cleanup); + unsigned long flags; + + chan_dbg(chan, "tasklet entry\n"); + + spin_lock_irqsave(&chan->desc_lock, flags); + + /* update the cookie if we have some descriptors to cleanup */ + if (!list_empty(&chan->ld_running)) { + dma_cookie_t cookie; + + desc = to_fsl_desc(chan->ld_running.prev); + cookie = desc->async_tx.cookie; + dma_cookie_complete(&desc->async_tx); + + chan_dbg(chan, "completed_cookie=%d\n", cookie); + } + + /* + * move the descriptors to a temporary list so we can drop the lock + * during the entire cleanup operation + */ + list_splice_tail_init(&chan->ld_running, &ld_cleanup); + + /* the hardware is now idle and ready for more */ + chan->idle = true; + + /* + * Start any pending transactions automatically + * + * In the ideal case, we keep the DMA controller busy while we go + * ahead and free the descriptors below. + */ + fsl_chan_xfer_ld_queue(chan); + spin_unlock_irqrestore(&chan->desc_lock, flags); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) { + + /* Remove from the list of transactions */ + list_del(&desc->node); + + /* Run all cleanup for this descriptor */ + fsldma_cleanup_descriptor(chan, desc); + } + + chan_dbg(chan, "tasklet exit\n"); +} + +static irqreturn_t fsldma_ctrl_irq(int irq, void *data) +{ + struct fsldma_device *fdev = data; + struct fsldma_chan *chan; + unsigned int handled = 0; + u32 gsr, mask; + int i; + + gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->regs) + : in_le32(fdev->regs); + mask = 0xff000000; + dev_dbg(fdev->dev, "IRQ: gsr 0x%.8x\n", gsr); + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + if (gsr & mask) { + dev_dbg(fdev->dev, "IRQ: chan %d\n", chan->id); + fsldma_chan_irq(irq, chan); + handled++; + } + + gsr &= ~mask; + mask >>= 8; + } + + return IRQ_RETVAL(handled); +} + +static void fsldma_free_irqs(struct fsldma_device *fdev) +{ + struct fsldma_chan *chan; + int i; + + if (fdev->irq != NO_IRQ) { + dev_dbg(fdev->dev, "free per-controller IRQ\n"); + free_irq(fdev->irq, fdev); + return; + } + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (chan && chan->irq != NO_IRQ) { + chan_dbg(chan, "free per-channel IRQ\n"); + free_irq(chan->irq, chan); + } + } +} + +static int fsldma_request_irqs(struct fsldma_device *fdev) +{ + struct fsldma_chan *chan; + int ret; + int i; + + /* if we have a per-controller IRQ, use that */ + if (fdev->irq != NO_IRQ) { + dev_dbg(fdev->dev, "request per-controller IRQ\n"); + ret = request_irq(fdev->irq, fsldma_ctrl_irq, IRQF_SHARED, + "fsldma-controller", fdev); + return ret; + } + + /* no per-controller IRQ, use the per-channel IRQs */ + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + if (chan->irq == NO_IRQ) { + chan_err(chan, "interrupts property missing in device tree\n"); + ret = -ENODEV; + goto out_unwind; + } + + chan_dbg(chan, "request per-channel IRQ\n"); + ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED, + "fsldma-chan", chan); + if (ret) { + chan_err(chan, "unable to request per-channel IRQ\n"); + goto out_unwind; + } + } + + return 0; + +out_unwind: + for (/* none */; i >= 0; i--) { + chan = fdev->chan[i]; + if (!chan) + continue; + + if (chan->irq == NO_IRQ) + continue; + + free_irq(chan->irq, chan); + } + + return ret; +} + +/*----------------------------------------------------------------------------*/ +/* OpenFirmware Subsystem */ +/*----------------------------------------------------------------------------*/ + +static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev, + struct device_node *node, u32 feature, const char *compatible) +{ + struct fsldma_chan *chan; + struct resource res; + int err; + + /* alloc channel */ + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) { + dev_err(fdev->dev, "no free memory for DMA channels!\n"); + err = -ENOMEM; + goto out_return; + } + + /* ioremap registers for use */ + chan->regs = of_iomap(node, 0); + if (!chan->regs) { + dev_err(fdev->dev, "unable to ioremap registers\n"); + err = -ENOMEM; + goto out_free_chan; + } + + err = of_address_to_resource(node, 0, &res); + if (err) { + dev_err(fdev->dev, "unable to find 'reg' property\n"); + goto out_iounmap_regs; + } + + chan->feature = feature; + if (!fdev->feature) + fdev->feature = chan->feature; + + /* + * If the DMA device's feature is different than the feature + * of its channels, report the bug + */ + WARN_ON(fdev->feature != chan->feature); + + chan->dev = fdev->dev; + chan->id = ((res.start - 0x100) & 0xfff) >> 7; + if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) { + dev_err(fdev->dev, "too many channels for device\n"); + err = -EINVAL; + goto out_iounmap_regs; + } + + fdev->chan[chan->id] = chan; + tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan); + snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id); + + /* Initialize the channel */ + dma_init(chan); + + /* Clear cdar registers */ + set_cdar(chan, 0); + + switch (chan->feature & FSL_DMA_IP_MASK) { + case FSL_DMA_IP_85XX: + chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; + case FSL_DMA_IP_83XX: + chan->toggle_ext_start = fsl_chan_toggle_ext_start; + chan->set_src_loop_size = fsl_chan_set_src_loop_size; + chan->set_dst_loop_size = fsl_chan_set_dst_loop_size; + chan->set_request_count = fsl_chan_set_request_count; + } + + spin_lock_init(&chan->desc_lock); + INIT_LIST_HEAD(&chan->ld_pending); + INIT_LIST_HEAD(&chan->ld_running); + chan->idle = true; + + chan->common.device = &fdev->common; + dma_cookie_init(&chan->common); + + /* find the IRQ line, if it exists in the device tree */ + chan->irq = irq_of_parse_and_map(node, 0); + + /* Add the channel to DMA device channel list */ + list_add_tail(&chan->common.device_node, &fdev->common.channels); + fdev->common.chancnt++; + + dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible, + chan->irq != NO_IRQ ? chan->irq : fdev->irq); + + return 0; + +out_iounmap_regs: + iounmap(chan->regs); +out_free_chan: + kfree(chan); +out_return: + return err; +} + +static void fsl_dma_chan_remove(struct fsldma_chan *chan) +{ + irq_dispose_mapping(chan->irq); + list_del(&chan->common.device_node); + iounmap(chan->regs); + kfree(chan); +} + +static int __devinit fsldma_of_probe(struct platform_device *op) +{ + struct fsldma_device *fdev; + struct device_node *child; + int err; + + fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); + if (!fdev) { + dev_err(&op->dev, "No enough memory for 'priv'\n"); + err = -ENOMEM; + goto out_return; + } + + fdev->dev = &op->dev; + INIT_LIST_HEAD(&fdev->common.channels); + + /* ioremap the registers for use */ + fdev->regs = of_iomap(op->dev.of_node, 0); + if (!fdev->regs) { + dev_err(&op->dev, "unable to ioremap registers\n"); + err = -ENOMEM; + goto out_free_fdev; + } + + /* map the channel IRQ if it exists, but don't hookup the handler yet */ + fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0); + + dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); + dma_cap_set(DMA_SG, fdev->common.cap_mask); + dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); + fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; + fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; + fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; + fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; + fdev->common.device_prep_dma_sg = fsl_dma_prep_sg; + fdev->common.device_tx_status = fsl_tx_status; + fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; + fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; + fdev->common.device_control = fsl_dma_device_control; + fdev->common.dev = &op->dev; + + dma_set_mask(&(op->dev), DMA_BIT_MASK(36)); + + dev_set_drvdata(&op->dev, fdev); + + /* + * We cannot use of_platform_bus_probe() because there is no + * of_platform_bus_remove(). Instead, we manually instantiate every DMA + * channel object. + */ + for_each_child_of_node(op->dev.of_node, child) { + if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) { + fsl_dma_chan_probe(fdev, child, + FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN, + "fsl,eloplus-dma-channel"); + } + + if (of_device_is_compatible(child, "fsl,elo-dma-channel")) { + fsl_dma_chan_probe(fdev, child, + FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN, + "fsl,elo-dma-channel"); + } + } + + /* + * Hookup the IRQ handler(s) + * + * If we have a per-controller interrupt, we prefer that to the + * per-channel interrupts to reduce the number of shared interrupt + * handlers on the same IRQ line + */ + err = fsldma_request_irqs(fdev); + if (err) { + dev_err(fdev->dev, "unable to request IRQs\n"); + goto out_free_fdev; + } + + dma_async_device_register(&fdev->common); + return 0; + +out_free_fdev: + irq_dispose_mapping(fdev->irq); + kfree(fdev); +out_return: + return err; +} + +static int fsldma_of_remove(struct platform_device *op) +{ + struct fsldma_device *fdev; + unsigned int i; + + fdev = dev_get_drvdata(&op->dev); + dma_async_device_unregister(&fdev->common); + + fsldma_free_irqs(fdev); + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + if (fdev->chan[i]) + fsl_dma_chan_remove(fdev->chan[i]); + } + + iounmap(fdev->regs); + dev_set_drvdata(&op->dev, NULL); + kfree(fdev); + + return 0; +} + +static const struct of_device_id fsldma_of_ids[] = { + { .compatible = "fsl,eloplus-dma", }, + { .compatible = "fsl,elo-dma", }, + {} +}; + +static struct platform_driver fsldma_of_driver = { + .driver = { + .name = "fsl-elo-dma", + .owner = THIS_MODULE, + .of_match_table = fsldma_of_ids, + }, + .probe = fsldma_of_probe, + .remove = fsldma_of_remove, +}; + +/*----------------------------------------------------------------------------*/ +/* Module Init / Exit */ +/*----------------------------------------------------------------------------*/ + +static __init int fsldma_init(void) +{ + pr_info("Freescale Elo / Elo Plus DMA driver\n"); + return platform_driver_register(&fsldma_of_driver); +} + +static void __exit fsldma_exit(void) +{ + platform_driver_unregister(&fsldma_of_driver); +} + +subsys_initcall(fsldma_init); +module_exit(fsldma_exit); + +MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h new file mode 100644 index 00000000..f5c38791 --- /dev/null +++ b/drivers/dma/fsldma.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: + * Zhang Wei <wei.zhang@freescale.com>, Jul 2007 + * Ebony Zhu <ebony.zhu@freescale.com>, May 2007 + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __DMA_FSLDMA_H +#define __DMA_FSLDMA_H + +#include <linux/device.h> +#include <linux/dmapool.h> +#include <linux/dmaengine.h> + +/* Define data structures needed by Freescale + * MPC8540 and MPC8349 DMA controller. + */ +#define FSL_DMA_MR_CS 0x00000001 +#define FSL_DMA_MR_CC 0x00000002 +#define FSL_DMA_MR_CA 0x00000008 +#define FSL_DMA_MR_EIE 0x00000040 +#define FSL_DMA_MR_XFE 0x00000020 +#define FSL_DMA_MR_EOLNIE 0x00000100 +#define FSL_DMA_MR_EOLSIE 0x00000080 +#define FSL_DMA_MR_EOSIE 0x00000200 +#define FSL_DMA_MR_CDSM 0x00000010 +#define FSL_DMA_MR_CTM 0x00000004 +#define FSL_DMA_MR_EMP_EN 0x00200000 +#define FSL_DMA_MR_EMS_EN 0x00040000 +#define FSL_DMA_MR_DAHE 0x00002000 +#define FSL_DMA_MR_SAHE 0x00001000 + +/* + * Bandwidth/pause control determines how many bytes a given + * channel is allowed to transfer before the DMA engine pauses + * the current channel and switches to the next channel + */ +#define FSL_DMA_MR_BWC 0x08000000 + +/* Special MR definition for MPC8349 */ +#define FSL_DMA_MR_EOTIE 0x00000080 +#define FSL_DMA_MR_PRC_RM 0x00000800 + +#define FSL_DMA_SR_CH 0x00000020 +#define FSL_DMA_SR_PE 0x00000010 +#define FSL_DMA_SR_CB 0x00000004 +#define FSL_DMA_SR_TE 0x00000080 +#define FSL_DMA_SR_EOSI 0x00000002 +#define FSL_DMA_SR_EOLSI 0x00000001 +#define FSL_DMA_SR_EOCDI 0x00000001 +#define FSL_DMA_SR_EOLNI 0x00000008 + +#define FSL_DMA_SATR_SBPATMU 0x20000000 +#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000 +#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000 +#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000 +#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000 +#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000 + +#define FSL_DMA_DATR_DBPATMU 0x20000000 +#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000 +#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000 +#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000 + +#define FSL_DMA_EOL ((u64)0x1) +#define FSL_DMA_SNEN ((u64)0x10) +#define FSL_DMA_EOSIE 0x8 +#define FSL_DMA_NLDA_MASK (~(u64)0x1f) + +#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu + +#define FSL_DMA_DGSR_TE 0x80 +#define FSL_DMA_DGSR_CH 0x20 +#define FSL_DMA_DGSR_PE 0x10 +#define FSL_DMA_DGSR_EOLNI 0x08 +#define FSL_DMA_DGSR_CB 0x04 +#define FSL_DMA_DGSR_EOSI 0x02 +#define FSL_DMA_DGSR_EOLSI 0x01 + +typedef u64 __bitwise v64; +typedef u32 __bitwise v32; + +struct fsl_dma_ld_hw { + v64 src_addr; + v64 dst_addr; + v64 next_ln_addr; + v32 count; + v32 reserve; +} __attribute__((aligned(32))); + +struct fsl_desc_sw { + struct fsl_dma_ld_hw hw; + struct list_head node; + struct list_head tx_list; + struct dma_async_tx_descriptor async_tx; +} __attribute__((aligned(32))); + +struct fsldma_chan_regs { + u32 mr; /* 0x00 - Mode Register */ + u32 sr; /* 0x04 - Status Register */ + u64 cdar; /* 0x08 - Current descriptor address register */ + u64 sar; /* 0x10 - Source Address Register */ + u64 dar; /* 0x18 - Destination Address Register */ + u32 bcr; /* 0x20 - Byte Count Register */ + u64 ndar; /* 0x24 - Next Descriptor Address Register */ +}; + +struct fsldma_chan; +#define FSL_DMA_MAX_CHANS_PER_DEVICE 4 + +struct fsldma_device { + void __iomem *regs; /* DGSR register base */ + struct device *dev; + struct dma_device common; + struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE]; + u32 feature; /* The same as DMA channels */ + int irq; /* Channel IRQ */ +}; + +/* Define macros for fsldma_chan->feature property */ +#define FSL_DMA_LITTLE_ENDIAN 0x00000000 +#define FSL_DMA_BIG_ENDIAN 0x00000001 + +#define FSL_DMA_IP_MASK 0x00000ff0 +#define FSL_DMA_IP_85XX 0x00000010 +#define FSL_DMA_IP_83XX 0x00000020 + +#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000 +#define FSL_DMA_CHAN_START_EXT 0x00002000 + +struct fsldma_chan { + char name[8]; /* Channel name */ + struct fsldma_chan_regs __iomem *regs; + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_pending; /* Link descriptors queue */ + struct list_head ld_running; /* Link descriptors queue */ + struct dma_chan common; /* DMA common channel */ + struct dma_pool *desc_pool; /* Descriptors pool */ + struct device *dev; /* Channel device */ + int irq; /* Channel IRQ */ + int id; /* Raw id of this channel */ + struct tasklet_struct tasklet; + u32 feature; + bool idle; /* DMA controller is idle */ + + void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable); + void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable); + void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size); + void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size); + void (*set_request_count)(struct fsldma_chan *fsl_chan, int size); +}; + +#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common) +#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node) +#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx) + +#ifndef __powerpc64__ +static u64 in_be64(const u64 __iomem *addr) +{ + return ((u64)in_be32((u32 __iomem *)addr) << 32) | + (in_be32((u32 __iomem *)addr + 1)); +} + +static void out_be64(u64 __iomem *addr, u64 val) +{ + out_be32((u32 __iomem *)addr, val >> 32); + out_be32((u32 __iomem *)addr + 1, (u32)val); +} + +/* There is no asm instructions for 64 bits reverse loads and stores */ +static u64 in_le64(const u64 __iomem *addr) +{ + return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) | + (in_le32((u32 __iomem *)addr)); +} + +static void out_le64(u64 __iomem *addr, u64 val) +{ + out_le32((u32 __iomem *)addr + 1, val >> 32); + out_le32((u32 __iomem *)addr, (u32)val); +} +#endif + +#define DMA_IN(fsl_chan, addr, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + in_be##width(addr) : in_le##width(addr)) +#define DMA_OUT(fsl_chan, addr, val, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + out_be##width(addr, val) : out_le##width(addr, val)) + +#define DMA_TO_CPU(fsl_chan, d, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + be##width##_to_cpu((__force __be##width)(v##width)d) : \ + le##width##_to_cpu((__force __le##width)(v##width)d)) +#define CPU_TO_DMA(fsl_chan, c, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + (__force v##width)cpu_to_be##width(c) : \ + (__force v##width)cpu_to_le##width(c)) + +#endif /* __DMA_FSLDMA_H */ diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c new file mode 100644 index 00000000..bb787d8e --- /dev/null +++ b/drivers/dma/imx-dma.c @@ -0,0 +1,1137 @@ +/* + * drivers/dma/imx-dma.c + * + * This file contains a driver for the Freescale i.MX DMA engine + * found on i.MX1/21/27 + * + * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de> + * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com> + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/module.h> + +#include <asm/irq.h> +#include <mach/dma.h> +#include <mach/hardware.h> + +#include "dmaengine.h" +#define IMXDMA_MAX_CHAN_DESCRIPTORS 16 +#define IMX_DMA_CHANNELS 16 + +#define IMX_DMA_2D_SLOTS 2 +#define IMX_DMA_2D_SLOT_A 0 +#define IMX_DMA_2D_SLOT_B 1 + +#define IMX_DMA_LENGTH_LOOP ((unsigned int)-1) +#define IMX_DMA_MEMSIZE_32 (0 << 4) +#define IMX_DMA_MEMSIZE_8 (1 << 4) +#define IMX_DMA_MEMSIZE_16 (2 << 4) +#define IMX_DMA_TYPE_LINEAR (0 << 10) +#define IMX_DMA_TYPE_2D (1 << 10) +#define IMX_DMA_TYPE_FIFO (2 << 10) + +#define IMX_DMA_ERR_BURST (1 << 0) +#define IMX_DMA_ERR_REQUEST (1 << 1) +#define IMX_DMA_ERR_TRANSFER (1 << 2) +#define IMX_DMA_ERR_BUFFER (1 << 3) +#define IMX_DMA_ERR_TIMEOUT (1 << 4) + +#define DMA_DCR 0x00 /* Control Register */ +#define DMA_DISR 0x04 /* Interrupt status Register */ +#define DMA_DIMR 0x08 /* Interrupt mask Register */ +#define DMA_DBTOSR 0x0c /* Burst timeout status Register */ +#define DMA_DRTOSR 0x10 /* Request timeout Register */ +#define DMA_DSESR 0x14 /* Transfer Error Status Register */ +#define DMA_DBOSR 0x18 /* Buffer overflow status Register */ +#define DMA_DBTOCR 0x1c /* Burst timeout control Register */ +#define DMA_WSRA 0x40 /* W-Size Register A */ +#define DMA_XSRA 0x44 /* X-Size Register A */ +#define DMA_YSRA 0x48 /* Y-Size Register A */ +#define DMA_WSRB 0x4c /* W-Size Register B */ +#define DMA_XSRB 0x50 /* X-Size Register B */ +#define DMA_YSRB 0x54 /* Y-Size Register B */ +#define DMA_SAR(x) (0x80 + ((x) << 6)) /* Source Address Registers */ +#define DMA_DAR(x) (0x84 + ((x) << 6)) /* Destination Address Registers */ +#define DMA_CNTR(x) (0x88 + ((x) << 6)) /* Count Registers */ +#define DMA_CCR(x) (0x8c + ((x) << 6)) /* Control Registers */ +#define DMA_RSSR(x) (0x90 + ((x) << 6)) /* Request source select Registers */ +#define DMA_BLR(x) (0x94 + ((x) << 6)) /* Burst length Registers */ +#define DMA_RTOR(x) (0x98 + ((x) << 6)) /* Request timeout Registers */ +#define DMA_BUCR(x) (0x98 + ((x) << 6)) /* Bus Utilization Registers */ +#define DMA_CCNR(x) (0x9C + ((x) << 6)) /* Channel counter Registers */ + +#define DCR_DRST (1<<1) +#define DCR_DEN (1<<0) +#define DBTOCR_EN (1<<15) +#define DBTOCR_CNT(x) ((x) & 0x7fff) +#define CNTR_CNT(x) ((x) & 0xffffff) +#define CCR_ACRPT (1<<14) +#define CCR_DMOD_LINEAR (0x0 << 12) +#define CCR_DMOD_2D (0x1 << 12) +#define CCR_DMOD_FIFO (0x2 << 12) +#define CCR_DMOD_EOBFIFO (0x3 << 12) +#define CCR_SMOD_LINEAR (0x0 << 10) +#define CCR_SMOD_2D (0x1 << 10) +#define CCR_SMOD_FIFO (0x2 << 10) +#define CCR_SMOD_EOBFIFO (0x3 << 10) +#define CCR_MDIR_DEC (1<<9) +#define CCR_MSEL_B (1<<8) +#define CCR_DSIZ_32 (0x0 << 6) +#define CCR_DSIZ_8 (0x1 << 6) +#define CCR_DSIZ_16 (0x2 << 6) +#define CCR_SSIZ_32 (0x0 << 4) +#define CCR_SSIZ_8 (0x1 << 4) +#define CCR_SSIZ_16 (0x2 << 4) +#define CCR_REN (1<<3) +#define CCR_RPT (1<<2) +#define CCR_FRC (1<<1) +#define CCR_CEN (1<<0) +#define RTOR_EN (1<<15) +#define RTOR_CLK (1<<14) +#define RTOR_PSC (1<<13) + +enum imxdma_prep_type { + IMXDMA_DESC_MEMCPY, + IMXDMA_DESC_INTERLEAVED, + IMXDMA_DESC_SLAVE_SG, + IMXDMA_DESC_CYCLIC, +}; + +struct imx_dma_2d_config { + u16 xsr; + u16 ysr; + u16 wsr; + int count; +}; + +struct imxdma_desc { + struct list_head node; + struct dma_async_tx_descriptor desc; + enum dma_status status; + dma_addr_t src; + dma_addr_t dest; + size_t len; + enum dma_transfer_direction direction; + enum imxdma_prep_type type; + /* For memcpy and interleaved */ + unsigned int config_port; + unsigned int config_mem; + /* For interleaved transfers */ + unsigned int x; + unsigned int y; + unsigned int w; + /* For slave sg and cyclic */ + struct scatterlist *sg; + unsigned int sgcount; +}; + +struct imxdma_channel { + int hw_chaining; + struct timer_list watchdog; + struct imxdma_engine *imxdma; + unsigned int channel; + + struct tasklet_struct dma_tasklet; + struct list_head ld_free; + struct list_head ld_queue; + struct list_head ld_active; + int descs_allocated; + enum dma_slave_buswidth word_size; + dma_addr_t per_address; + u32 watermark_level; + struct dma_chan chan; + struct dma_async_tx_descriptor desc; + enum dma_status status; + int dma_request; + struct scatterlist *sg_list; + u32 ccr_from_device; + u32 ccr_to_device; + bool enabled_2d; + int slot_2d; +}; + +struct imxdma_engine { + struct device *dev; + struct device_dma_parameters dma_parms; + struct dma_device dma_device; + void __iomem *base; + struct clk *dma_clk; + spinlock_t lock; + struct imx_dma_2d_config slots_2d[IMX_DMA_2D_SLOTS]; + struct imxdma_channel channel[IMX_DMA_CHANNELS]; +}; + +static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct imxdma_channel, chan); +} + +static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac) +{ + struct imxdma_desc *desc; + + if (!list_empty(&imxdmac->ld_active)) { + desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, + node); + if (desc->type == IMXDMA_DESC_CYCLIC) + return true; + } + return false; +} + + + +static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val, + unsigned offset) +{ + __raw_writel(val, imxdma->base + offset); +} + +static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset) +{ + return __raw_readl(imxdma->base + offset); +} + +static int imxdma_hw_chain(struct imxdma_channel *imxdmac) +{ + if (cpu_is_mx27()) + return imxdmac->hw_chaining; + else + return 0; +} + +/* + * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation + */ +static inline int imxdma_sg_next(struct imxdma_desc *d) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct scatterlist *sg = d->sg; + unsigned long now; + + now = min(d->len, sg->length); + if (d->len != IMX_DMA_LENGTH_LOOP) + d->len -= now; + + if (d->direction == DMA_DEV_TO_MEM) + imx_dmav1_writel(imxdma, sg->dma_address, + DMA_DAR(imxdmac->channel)); + else + imx_dmav1_writel(imxdma, sg->dma_address, + DMA_SAR(imxdmac->channel)); + + imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel)); + + dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, " + "size 0x%08x\n", __func__, imxdmac->channel, + imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)), + imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)), + imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel))); + + return now; +} + +static void imxdma_enable_hw(struct imxdma_desc *d) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + int channel = imxdmac->channel; + unsigned long flags; + + dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel); + + local_irq_save(flags); + + imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) & + ~(1 << channel), DMA_DIMR); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) | + CCR_CEN | CCR_ACRPT, DMA_CCR(channel)); + + if ((cpu_is_mx21() || cpu_is_mx27()) && + d->sg && imxdma_hw_chain(imxdmac)) { + d->sg = sg_next(d->sg); + if (d->sg) { + u32 tmp; + imxdma_sg_next(d); + tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel)); + imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT, + DMA_CCR(channel)); + } + } + + local_irq_restore(flags); +} + +static void imxdma_disable_hw(struct imxdma_channel *imxdmac) +{ + struct imxdma_engine *imxdma = imxdmac->imxdma; + int channel = imxdmac->channel; + unsigned long flags; + + dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel); + + if (imxdma_hw_chain(imxdmac)) + del_timer(&imxdmac->watchdog); + + local_irq_save(flags); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) | + (1 << channel), DMA_DIMR); + imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) & + ~CCR_CEN, DMA_CCR(channel)); + imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR); + local_irq_restore(flags); +} + +static void imxdma_watchdog(unsigned long data) +{ + struct imxdma_channel *imxdmac = (struct imxdma_channel *)data; + struct imxdma_engine *imxdma = imxdmac->imxdma; + int channel = imxdmac->channel; + + imx_dmav1_writel(imxdma, 0, DMA_CCR(channel)); + + /* Tasklet watchdog error handler */ + tasklet_schedule(&imxdmac->dma_tasklet); + dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n", + imxdmac->channel); +} + +static irqreturn_t imxdma_err_handler(int irq, void *dev_id) +{ + struct imxdma_engine *imxdma = dev_id; + unsigned int err_mask; + int i, disr; + int errcode; + + disr = imx_dmav1_readl(imxdma, DMA_DISR); + + err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) | + imx_dmav1_readl(imxdma, DMA_DRTOSR) | + imx_dmav1_readl(imxdma, DMA_DSESR) | + imx_dmav1_readl(imxdma, DMA_DBOSR); + + if (!err_mask) + return IRQ_HANDLED; + + imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR); + + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + if (!(err_mask & (1 << i))) + continue; + errcode = 0; + + if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR); + errcode |= IMX_DMA_ERR_BURST; + } + if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR); + errcode |= IMX_DMA_ERR_REQUEST; + } + if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR); + errcode |= IMX_DMA_ERR_TRANSFER; + } + if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) { + imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR); + errcode |= IMX_DMA_ERR_BUFFER; + } + /* Tasklet error handler */ + tasklet_schedule(&imxdma->channel[i].dma_tasklet); + + printk(KERN_WARNING + "DMA timeout on channel %d -%s%s%s%s\n", i, + errcode & IMX_DMA_ERR_BURST ? " burst" : "", + errcode & IMX_DMA_ERR_REQUEST ? " request" : "", + errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "", + errcode & IMX_DMA_ERR_BUFFER ? " buffer" : ""); + } + return IRQ_HANDLED; +} + +static void dma_irq_handle_channel(struct imxdma_channel *imxdmac) +{ + struct imxdma_engine *imxdma = imxdmac->imxdma; + int chno = imxdmac->channel; + struct imxdma_desc *desc; + + spin_lock(&imxdma->lock); + if (list_empty(&imxdmac->ld_active)) { + spin_unlock(&imxdma->lock); + goto out; + } + + desc = list_first_entry(&imxdmac->ld_active, + struct imxdma_desc, + node); + spin_unlock(&imxdma->lock); + + if (desc->sg) { + u32 tmp; + desc->sg = sg_next(desc->sg); + + if (desc->sg) { + imxdma_sg_next(desc); + + tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno)); + + if (imxdma_hw_chain(imxdmac)) { + /* FIXME: The timeout should probably be + * configurable + */ + mod_timer(&imxdmac->watchdog, + jiffies + msecs_to_jiffies(500)); + + tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT; + imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno)); + } else { + imx_dmav1_writel(imxdma, tmp & ~CCR_CEN, + DMA_CCR(chno)); + tmp |= CCR_CEN; + } + + imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno)); + + if (imxdma_chan_is_doing_cyclic(imxdmac)) + /* Tasklet progression */ + tasklet_schedule(&imxdmac->dma_tasklet); + + return; + } + + if (imxdma_hw_chain(imxdmac)) { + del_timer(&imxdmac->watchdog); + return; + } + } + +out: + imx_dmav1_writel(imxdma, 0, DMA_CCR(chno)); + /* Tasklet irq */ + tasklet_schedule(&imxdmac->dma_tasklet); +} + +static irqreturn_t dma_irq_handler(int irq, void *dev_id) +{ + struct imxdma_engine *imxdma = dev_id; + int i, disr; + + if (cpu_is_mx21() || cpu_is_mx27()) + imxdma_err_handler(irq, dev_id); + + disr = imx_dmav1_readl(imxdma, DMA_DISR); + + dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr); + + imx_dmav1_writel(imxdma, disr, DMA_DISR); + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + if (disr & (1 << i)) + dma_irq_handle_channel(&imxdma->channel[i]); + } + + return IRQ_HANDLED; +} + +static int imxdma_xfer_desc(struct imxdma_desc *d) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + unsigned long flags; + int slot = -1; + int i; + + /* Configure and enable */ + switch (d->type) { + case IMXDMA_DESC_INTERLEAVED: + /* Try to get a free 2D slot */ + spin_lock_irqsave(&imxdma->lock, flags); + for (i = 0; i < IMX_DMA_2D_SLOTS; i++) { + if ((imxdma->slots_2d[i].count > 0) && + ((imxdma->slots_2d[i].xsr != d->x) || + (imxdma->slots_2d[i].ysr != d->y) || + (imxdma->slots_2d[i].wsr != d->w))) + continue; + slot = i; + break; + } + if (slot < 0) + return -EBUSY; + + imxdma->slots_2d[slot].xsr = d->x; + imxdma->slots_2d[slot].ysr = d->y; + imxdma->slots_2d[slot].wsr = d->w; + imxdma->slots_2d[slot].count++; + + imxdmac->slot_2d = slot; + imxdmac->enabled_2d = true; + spin_unlock_irqrestore(&imxdma->lock, flags); + + if (slot == IMX_DMA_2D_SLOT_A) { + d->config_mem &= ~CCR_MSEL_B; + d->config_port &= ~CCR_MSEL_B; + imx_dmav1_writel(imxdma, d->x, DMA_XSRA); + imx_dmav1_writel(imxdma, d->y, DMA_YSRA); + imx_dmav1_writel(imxdma, d->w, DMA_WSRA); + } else { + d->config_mem |= CCR_MSEL_B; + d->config_port |= CCR_MSEL_B; + imx_dmav1_writel(imxdma, d->x, DMA_XSRB); + imx_dmav1_writel(imxdma, d->y, DMA_YSRB); + imx_dmav1_writel(imxdma, d->w, DMA_WSRB); + } + /* + * We fall-through here intentionally, since a 2D transfer is + * similar to MEMCPY just adding the 2D slot configuration. + */ + case IMXDMA_DESC_MEMCPY: + imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2), + DMA_CCR(imxdmac->channel)); + + imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel)); + + dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x " + "dma_length=%d\n", __func__, imxdmac->channel, + d->dest, d->src, d->len); + + break; + /* Cyclic transfer is the same as slave_sg with special sg configuration. */ + case IMXDMA_DESC_CYCLIC: + case IMXDMA_DESC_SLAVE_SG: + if (d->direction == DMA_DEV_TO_MEM) { + imx_dmav1_writel(imxdma, imxdmac->per_address, + DMA_SAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, imxdmac->ccr_from_device, + DMA_CCR(imxdmac->channel)); + + dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d " + "total length=%d dev_addr=0x%08x (dev2mem)\n", + __func__, imxdmac->channel, d->sg, d->sgcount, + d->len, imxdmac->per_address); + } else if (d->direction == DMA_MEM_TO_DEV) { + imx_dmav1_writel(imxdma, imxdmac->per_address, + DMA_DAR(imxdmac->channel)); + imx_dmav1_writel(imxdma, imxdmac->ccr_to_device, + DMA_CCR(imxdmac->channel)); + + dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d " + "total length=%d dev_addr=0x%08x (mem2dev)\n", + __func__, imxdmac->channel, d->sg, d->sgcount, + d->len, imxdmac->per_address); + } else { + dev_err(imxdma->dev, "%s channel: %d bad dma mode\n", + __func__, imxdmac->channel); + return -EINVAL; + } + + imxdma_sg_next(d); + + break; + default: + return -EINVAL; + } + imxdma_enable_hw(d); + return 0; +} + +static void imxdma_tasklet(unsigned long data) +{ + struct imxdma_channel *imxdmac = (void *)data; + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + + spin_lock(&imxdma->lock); + + if (list_empty(&imxdmac->ld_active)) { + /* Someone might have called terminate all */ + goto out; + } + desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node); + + if (desc->desc.callback) + desc->desc.callback(desc->desc.callback_param); + + /* If we are dealing with a cyclic descriptor keep it on ld_active + * and dont mark the descripor as complete. + * Only in non-cyclic cases it would be marked as complete + */ + if (imxdma_chan_is_doing_cyclic(imxdmac)) + goto out; + else + dma_cookie_complete(&desc->desc); + + /* Free 2D slot if it was an interleaved transfer */ + if (imxdmac->enabled_2d) { + imxdma->slots_2d[imxdmac->slot_2d].count--; + imxdmac->enabled_2d = false; + } + + list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free); + + if (!list_empty(&imxdmac->ld_queue)) { + desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc, + node); + list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active); + if (imxdma_xfer_desc(desc) < 0) + dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n", + __func__, imxdmac->channel); + } +out: + spin_unlock(&imxdma->lock); +} + +static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct dma_slave_config *dmaengine_cfg = (void *)arg; + struct imxdma_engine *imxdma = imxdmac->imxdma; + unsigned long flags; + unsigned int mode = 0; + + switch (cmd) { + case DMA_TERMINATE_ALL: + imxdma_disable_hw(imxdmac); + + spin_lock_irqsave(&imxdma->lock, flags); + list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free); + list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free); + spin_unlock_irqrestore(&imxdma->lock, flags); + return 0; + case DMA_SLAVE_CONFIG: + if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) { + imxdmac->per_address = dmaengine_cfg->src_addr; + imxdmac->watermark_level = dmaengine_cfg->src_maxburst; + imxdmac->word_size = dmaengine_cfg->src_addr_width; + } else { + imxdmac->per_address = dmaengine_cfg->dst_addr; + imxdmac->watermark_level = dmaengine_cfg->dst_maxburst; + imxdmac->word_size = dmaengine_cfg->dst_addr_width; + } + + switch (imxdmac->word_size) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + mode = IMX_DMA_MEMSIZE_8; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + mode = IMX_DMA_MEMSIZE_16; + break; + default: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + mode = IMX_DMA_MEMSIZE_32; + break; + } + + imxdmac->hw_chaining = 1; + if (!imxdma_hw_chain(imxdmac)) + return -EINVAL; + imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) | + ((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) | + CCR_REN; + imxdmac->ccr_to_device = + (IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) | + ((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN; + imx_dmav1_writel(imxdma, imxdmac->dma_request, + DMA_RSSR(imxdmac->channel)); + + /* Set burst length */ + imx_dmav1_writel(imxdma, imxdmac->watermark_level * + imxdmac->word_size, DMA_BLR(imxdmac->channel)); + + return 0; + default: + return -ENOSYS; + } + + return -EINVAL; +} + +static enum dma_status imxdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(chan, cookie, txstate); +} + +static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue); + cookie = dma_cookie_assign(tx); + spin_unlock_irqrestore(&imxdma->lock, flags); + + return cookie; +} + +static int imxdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imx_dma_data *data = chan->private; + + if (data != NULL) + imxdmac->dma_request = data->dma_request; + + while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) { + struct imxdma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + break; + __memzero(&desc->desc, sizeof(struct dma_async_tx_descriptor)); + dma_async_tx_descriptor_init(&desc->desc, chan); + desc->desc.tx_submit = imxdma_tx_submit; + /* txd.flags will be overwritten in prep funcs */ + desc->desc.flags = DMA_CTRL_ACK; + desc->status = DMA_SUCCESS; + + list_add_tail(&desc->node, &imxdmac->ld_free); + imxdmac->descs_allocated++; + } + + if (!imxdmac->descs_allocated) + return -ENOMEM; + + return imxdmac->descs_allocated; +} + +static void imxdma_free_chan_resources(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc, *_desc; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + + imxdma_disable_hw(imxdmac); + list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free); + list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free); + + spin_unlock_irqrestore(&imxdma->lock, flags); + + list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) { + kfree(desc); + imxdmac->descs_allocated--; + } + INIT_LIST_HEAD(&imxdmac->ld_free); + + if (imxdmac->sg_list) { + kfree(imxdmac->sg_list); + imxdmac->sg_list = NULL; + } +} + +static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct scatterlist *sg; + int i, dma_length = 0; + struct imxdma_desc *desc; + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + for_each_sg(sgl, sg, sg_len, i) { + dma_length += sg->length; + } + + switch (imxdmac->word_size) { + case DMA_SLAVE_BUSWIDTH_4_BYTES: + if (sgl->length & 3 || sgl->dma_address & 3) + return NULL; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + if (sgl->length & 1 || sgl->dma_address & 1) + return NULL; + break; + case DMA_SLAVE_BUSWIDTH_1_BYTE: + break; + default: + return NULL; + } + + desc->type = IMXDMA_DESC_SLAVE_SG; + desc->sg = sgl; + desc->sgcount = sg_len; + desc->len = dma_length; + desc->direction = direction; + if (direction == DMA_DEV_TO_MEM) { + desc->src = imxdmac->per_address; + } else { + desc->dest = imxdmac->per_address; + } + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + void *context) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + int i; + unsigned int periods = buf_len / period_len; + + dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n", + __func__, imxdmac->channel, buf_len, period_len); + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + if (imxdmac->sg_list) + kfree(imxdmac->sg_list); + + imxdmac->sg_list = kcalloc(periods + 1, + sizeof(struct scatterlist), GFP_KERNEL); + if (!imxdmac->sg_list) + return NULL; + + sg_init_table(imxdmac->sg_list, periods); + + for (i = 0; i < periods; i++) { + imxdmac->sg_list[i].page_link = 0; + imxdmac->sg_list[i].offset = 0; + imxdmac->sg_list[i].dma_address = dma_addr; + imxdmac->sg_list[i].length = period_len; + dma_addr += period_len; + } + + /* close the loop */ + imxdmac->sg_list[periods].offset = 0; + imxdmac->sg_list[periods].length = 0; + imxdmac->sg_list[periods].page_link = + ((unsigned long)imxdmac->sg_list | 0x01) & ~0x02; + + desc->type = IMXDMA_DESC_CYCLIC; + desc->sg = imxdmac->sg_list; + desc->sgcount = periods; + desc->len = IMX_DMA_LENGTH_LOOP; + desc->direction = direction; + if (direction == DMA_DEV_TO_MEM) { + desc->src = imxdmac->per_address; + } else { + desc->dest = imxdmac->per_address; + } + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + + dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n", + __func__, imxdmac->channel, src, dest, len); + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + desc->type = IMXDMA_DESC_MEMCPY; + desc->src = src; + desc->dest = dest; + desc->len = len; + desc->direction = DMA_MEM_TO_MEM; + desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR; + desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR; + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved( + struct dma_chan *chan, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + + dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n" + " src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__, + imxdmac->channel, xt->src_start, xt->dst_start, + xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false", + xt->numf, xt->frame_size); + + if (list_empty(&imxdmac->ld_free) || + imxdma_chan_is_doing_cyclic(imxdmac)) + return NULL; + + if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM) + return NULL; + + desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node); + + desc->type = IMXDMA_DESC_INTERLEAVED; + desc->src = xt->src_start; + desc->dest = xt->dst_start; + desc->x = xt->sgl[0].size; + desc->y = xt->numf; + desc->w = xt->sgl[0].icg + desc->x; + desc->len = desc->x * desc->y; + desc->direction = DMA_MEM_TO_MEM; + desc->config_port = IMX_DMA_MEMSIZE_32; + desc->config_mem = IMX_DMA_MEMSIZE_32; + if (xt->src_sgl) + desc->config_mem |= IMX_DMA_TYPE_2D; + if (xt->dst_sgl) + desc->config_port |= IMX_DMA_TYPE_2D; + desc->desc.callback = NULL; + desc->desc.callback_param = NULL; + + return &desc->desc; +} + +static void imxdma_issue_pending(struct dma_chan *chan) +{ + struct imxdma_channel *imxdmac = to_imxdma_chan(chan); + struct imxdma_engine *imxdma = imxdmac->imxdma; + struct imxdma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&imxdma->lock, flags); + if (list_empty(&imxdmac->ld_active) && + !list_empty(&imxdmac->ld_queue)) { + desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); + + if (imxdma_xfer_desc(desc) < 0) { + dev_warn(imxdma->dev, + "%s: channel: %d couldn't issue DMA xfer\n", + __func__, imxdmac->channel); + } else { + list_move_tail(imxdmac->ld_queue.next, + &imxdmac->ld_active); + } + } + spin_unlock_irqrestore(&imxdma->lock, flags); +} + +static int __init imxdma_probe(struct platform_device *pdev) + { + struct imxdma_engine *imxdma; + int ret, i; + + + imxdma = kzalloc(sizeof(*imxdma), GFP_KERNEL); + if (!imxdma) + return -ENOMEM; + + if (cpu_is_mx1()) { + imxdma->base = MX1_IO_ADDRESS(MX1_DMA_BASE_ADDR); + } else if (cpu_is_mx21()) { + imxdma->base = MX21_IO_ADDRESS(MX21_DMA_BASE_ADDR); + } else if (cpu_is_mx27()) { + imxdma->base = MX27_IO_ADDRESS(MX27_DMA_BASE_ADDR); + } else { + kfree(imxdma); + return 0; + } + + imxdma->dma_clk = clk_get(NULL, "dma"); + if (IS_ERR(imxdma->dma_clk)) + return PTR_ERR(imxdma->dma_clk); + clk_enable(imxdma->dma_clk); + + /* reset DMA module */ + imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR); + + if (cpu_is_mx1()) { + ret = request_irq(MX1_DMA_INT, dma_irq_handler, 0, "DMA", imxdma); + if (ret) { + dev_warn(imxdma->dev, "Can't register IRQ for DMA\n"); + kfree(imxdma); + return ret; + } + + ret = request_irq(MX1_DMA_ERR, imxdma_err_handler, 0, "DMA", imxdma); + if (ret) { + dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n"); + free_irq(MX1_DMA_INT, NULL); + kfree(imxdma); + return ret; + } + } + + /* enable DMA module */ + imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR); + + /* clear all interrupts */ + imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR); + + /* disable interrupts */ + imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR); + + INIT_LIST_HEAD(&imxdma->dma_device.channels); + + dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask); + dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask); + dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask); + + /* Initialize 2D global parameters */ + for (i = 0; i < IMX_DMA_2D_SLOTS; i++) + imxdma->slots_2d[i].count = 0; + + spin_lock_init(&imxdma->lock); + + /* Initialize channel parameters */ + for (i = 0; i < IMX_DMA_CHANNELS; i++) { + struct imxdma_channel *imxdmac = &imxdma->channel[i]; + + if (cpu_is_mx21() || cpu_is_mx27()) { + ret = request_irq(MX2x_INT_DMACH0 + i, + dma_irq_handler, 0, "DMA", imxdma); + if (ret) { + dev_warn(imxdma->dev, "Can't register IRQ %d " + "for DMA channel %d\n", + MX2x_INT_DMACH0 + i, i); + goto err_init; + } + init_timer(&imxdmac->watchdog); + imxdmac->watchdog.function = &imxdma_watchdog; + imxdmac->watchdog.data = (unsigned long)imxdmac; + } + + imxdmac->imxdma = imxdma; + + INIT_LIST_HEAD(&imxdmac->ld_queue); + INIT_LIST_HEAD(&imxdmac->ld_free); + INIT_LIST_HEAD(&imxdmac->ld_active); + + tasklet_init(&imxdmac->dma_tasklet, imxdma_tasklet, + (unsigned long)imxdmac); + imxdmac->chan.device = &imxdma->dma_device; + dma_cookie_init(&imxdmac->chan); + imxdmac->channel = i; + + /* Add the channel to the DMAC list */ + list_add_tail(&imxdmac->chan.device_node, + &imxdma->dma_device.channels); + } + + imxdma->dev = &pdev->dev; + imxdma->dma_device.dev = &pdev->dev; + + imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources; + imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources; + imxdma->dma_device.device_tx_status = imxdma_tx_status; + imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg; + imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic; + imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy; + imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved; + imxdma->dma_device.device_control = imxdma_control; + imxdma->dma_device.device_issue_pending = imxdma_issue_pending; + + platform_set_drvdata(pdev, imxdma); + + imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */ + imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms; + dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff); + + ret = dma_async_device_register(&imxdma->dma_device); + if (ret) { + dev_err(&pdev->dev, "unable to register\n"); + goto err_init; + } + + return 0; + +err_init: + + if (cpu_is_mx21() || cpu_is_mx27()) { + while (--i >= 0) + free_irq(MX2x_INT_DMACH0 + i, NULL); + } else if cpu_is_mx1() { + free_irq(MX1_DMA_INT, NULL); + free_irq(MX1_DMA_ERR, NULL); + } + + kfree(imxdma); + return ret; +} + +static int __exit imxdma_remove(struct platform_device *pdev) +{ + struct imxdma_engine *imxdma = platform_get_drvdata(pdev); + int i; + + dma_async_device_unregister(&imxdma->dma_device); + + if (cpu_is_mx21() || cpu_is_mx27()) { + for (i = 0; i < IMX_DMA_CHANNELS; i++) + free_irq(MX2x_INT_DMACH0 + i, NULL); + } else if cpu_is_mx1() { + free_irq(MX1_DMA_INT, NULL); + free_irq(MX1_DMA_ERR, NULL); + } + + kfree(imxdma); + + return 0; +} + +static struct platform_driver imxdma_driver = { + .driver = { + .name = "imx-dma", + }, + .remove = __exit_p(imxdma_remove), +}; + +static int __init imxdma_module_init(void) +{ + return platform_driver_probe(&imxdma_driver, imxdma_probe); +} +subsys_initcall(imxdma_module_init); + +MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>"); +MODULE_DESCRIPTION("i.MX dma driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c new file mode 100644 index 00000000..d3e38e28 --- /dev/null +++ b/drivers/dma/imx-sdma.c @@ -0,0 +1,1460 @@ +/* + * drivers/dma/imx-sdma.c + * + * This file contains a driver for the Freescale Smart DMA engine + * + * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de> + * + * Based on code from Freescale: + * + * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/clk.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <linux/dmaengine.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#include <asm/irq.h> +#include <mach/sdma.h> +#include <mach/dma.h> +#include <mach/hardware.h> + +#include "dmaengine.h" + +/* SDMA registers */ +#define SDMA_H_C0PTR 0x000 +#define SDMA_H_INTR 0x004 +#define SDMA_H_STATSTOP 0x008 +#define SDMA_H_START 0x00c +#define SDMA_H_EVTOVR 0x010 +#define SDMA_H_DSPOVR 0x014 +#define SDMA_H_HOSTOVR 0x018 +#define SDMA_H_EVTPEND 0x01c +#define SDMA_H_DSPENBL 0x020 +#define SDMA_H_RESET 0x024 +#define SDMA_H_EVTERR 0x028 +#define SDMA_H_INTRMSK 0x02c +#define SDMA_H_PSW 0x030 +#define SDMA_H_EVTERRDBG 0x034 +#define SDMA_H_CONFIG 0x038 +#define SDMA_ONCE_ENB 0x040 +#define SDMA_ONCE_DATA 0x044 +#define SDMA_ONCE_INSTR 0x048 +#define SDMA_ONCE_STAT 0x04c +#define SDMA_ONCE_CMD 0x050 +#define SDMA_EVT_MIRROR 0x054 +#define SDMA_ILLINSTADDR 0x058 +#define SDMA_CHN0ADDR 0x05c +#define SDMA_ONCE_RTB 0x060 +#define SDMA_XTRIG_CONF1 0x070 +#define SDMA_XTRIG_CONF2 0x074 +#define SDMA_CHNENBL0_IMX35 0x200 +#define SDMA_CHNENBL0_IMX31 0x080 +#define SDMA_CHNPRI_0 0x100 + +/* + * Buffer descriptor status values. + */ +#define BD_DONE 0x01 +#define BD_WRAP 0x02 +#define BD_CONT 0x04 +#define BD_INTR 0x08 +#define BD_RROR 0x10 +#define BD_LAST 0x20 +#define BD_EXTD 0x80 + +/* + * Data Node descriptor status values. + */ +#define DND_END_OF_FRAME 0x80 +#define DND_END_OF_XFER 0x40 +#define DND_DONE 0x20 +#define DND_UNUSED 0x01 + +/* + * IPCV2 descriptor status values. + */ +#define BD_IPCV2_END_OF_FRAME 0x40 + +#define IPCV2_MAX_NODES 50 +/* + * Error bit set in the CCB status field by the SDMA, + * in setbd routine, in case of a transfer error + */ +#define DATA_ERROR 0x10000000 + +/* + * Buffer descriptor commands. + */ +#define C0_ADDR 0x01 +#define C0_LOAD 0x02 +#define C0_DUMP 0x03 +#define C0_SETCTX 0x07 +#define C0_GETCTX 0x03 +#define C0_SETDM 0x01 +#define C0_SETPM 0x04 +#define C0_GETDM 0x02 +#define C0_GETPM 0x08 +/* + * Change endianness indicator in the BD command field + */ +#define CHANGE_ENDIANNESS 0x80 + +/* + * Mode/Count of data node descriptors - IPCv2 + */ +struct sdma_mode_count { + u32 count : 16; /* size of the buffer pointed by this BD */ + u32 status : 8; /* E,R,I,C,W,D status bits stored here */ + u32 command : 8; /* command mostlky used for channel 0 */ +}; + +/* + * Buffer descriptor + */ +struct sdma_buffer_descriptor { + struct sdma_mode_count mode; + u32 buffer_addr; /* address of the buffer described */ + u32 ext_buffer_addr; /* extended buffer address */ +} __attribute__ ((packed)); + +/** + * struct sdma_channel_control - Channel control Block + * + * @current_bd_ptr current buffer descriptor processed + * @base_bd_ptr first element of buffer descriptor array + * @unused padding. The SDMA engine expects an array of 128 byte + * control blocks + */ +struct sdma_channel_control { + u32 current_bd_ptr; + u32 base_bd_ptr; + u32 unused[2]; +} __attribute__ ((packed)); + +/** + * struct sdma_state_registers - SDMA context for a channel + * + * @pc: program counter + * @t: test bit: status of arithmetic & test instruction + * @rpc: return program counter + * @sf: source fault while loading data + * @spc: loop start program counter + * @df: destination fault while storing data + * @epc: loop end program counter + * @lm: loop mode + */ +struct sdma_state_registers { + u32 pc :14; + u32 unused1: 1; + u32 t : 1; + u32 rpc :14; + u32 unused0: 1; + u32 sf : 1; + u32 spc :14; + u32 unused2: 1; + u32 df : 1; + u32 epc :14; + u32 lm : 2; +} __attribute__ ((packed)); + +/** + * struct sdma_context_data - sdma context specific to a channel + * + * @channel_state: channel state bits + * @gReg: general registers + * @mda: burst dma destination address register + * @msa: burst dma source address register + * @ms: burst dma status register + * @md: burst dma data register + * @pda: peripheral dma destination address register + * @psa: peripheral dma source address register + * @ps: peripheral dma status register + * @pd: peripheral dma data register + * @ca: CRC polynomial register + * @cs: CRC accumulator register + * @dda: dedicated core destination address register + * @dsa: dedicated core source address register + * @ds: dedicated core status register + * @dd: dedicated core data register + */ +struct sdma_context_data { + struct sdma_state_registers channel_state; + u32 gReg[8]; + u32 mda; + u32 msa; + u32 ms; + u32 md; + u32 pda; + u32 psa; + u32 ps; + u32 pd; + u32 ca; + u32 cs; + u32 dda; + u32 dsa; + u32 ds; + u32 dd; + u32 scratch0; + u32 scratch1; + u32 scratch2; + u32 scratch3; + u32 scratch4; + u32 scratch5; + u32 scratch6; + u32 scratch7; +} __attribute__ ((packed)); + +#define NUM_BD (int)(PAGE_SIZE / sizeof(struct sdma_buffer_descriptor)) + +struct sdma_engine; + +/** + * struct sdma_channel - housekeeping for a SDMA channel + * + * @sdma pointer to the SDMA engine for this channel + * @channel the channel number, matches dmaengine chan_id + 1 + * @direction transfer type. Needed for setting SDMA script + * @peripheral_type Peripheral type. Needed for setting SDMA script + * @event_id0 aka dma request line + * @event_id1 for channels that use 2 events + * @word_size peripheral access size + * @buf_tail ID of the buffer that was processed + * @done channel completion + * @num_bd max NUM_BD. number of descriptors currently handling + */ +struct sdma_channel { + struct sdma_engine *sdma; + unsigned int channel; + enum dma_transfer_direction direction; + enum sdma_peripheral_type peripheral_type; + unsigned int event_id0; + unsigned int event_id1; + enum dma_slave_buswidth word_size; + unsigned int buf_tail; + struct completion done; + unsigned int num_bd; + struct sdma_buffer_descriptor *bd; + dma_addr_t bd_phys; + unsigned int pc_from_device, pc_to_device; + unsigned long flags; + dma_addr_t per_address; + unsigned long event_mask[2]; + unsigned long watermark_level; + u32 shp_addr, per_addr; + struct dma_chan chan; + spinlock_t lock; + struct dma_async_tx_descriptor desc; + enum dma_status status; + unsigned int chn_count; + unsigned int chn_real_count; +}; + +#define IMX_DMA_SG_LOOP BIT(0) + +#define MAX_DMA_CHANNELS 32 +#define MXC_SDMA_DEFAULT_PRIORITY 1 +#define MXC_SDMA_MIN_PRIORITY 1 +#define MXC_SDMA_MAX_PRIORITY 7 + +#define SDMA_FIRMWARE_MAGIC 0x414d4453 + +/** + * struct sdma_firmware_header - Layout of the firmware image + * + * @magic "SDMA" + * @version_major increased whenever layout of struct sdma_script_start_addrs + * changes. + * @version_minor firmware minor version (for binary compatible changes) + * @script_addrs_start offset of struct sdma_script_start_addrs in this image + * @num_script_addrs Number of script addresses in this image + * @ram_code_start offset of SDMA ram image in this firmware image + * @ram_code_size size of SDMA ram image + * @script_addrs Stores the start address of the SDMA scripts + * (in SDMA memory space) + */ +struct sdma_firmware_header { + u32 magic; + u32 version_major; + u32 version_minor; + u32 script_addrs_start; + u32 num_script_addrs; + u32 ram_code_start; + u32 ram_code_size; +}; + +enum sdma_devtype { + IMX31_SDMA, /* runs on i.mx31 */ + IMX35_SDMA, /* runs on i.mx35 and later */ +}; + +struct sdma_engine { + struct device *dev; + struct device_dma_parameters dma_parms; + struct sdma_channel channel[MAX_DMA_CHANNELS]; + struct sdma_channel_control *channel_control; + void __iomem *regs; + enum sdma_devtype devtype; + unsigned int num_events; + struct sdma_context_data *context; + dma_addr_t context_phys; + struct dma_device dma_device; + struct clk *clk; + struct mutex channel_0_lock; + struct sdma_script_start_addrs *script_addrs; +}; + +static struct platform_device_id sdma_devtypes[] = { + { + .name = "imx31-sdma", + .driver_data = IMX31_SDMA, + }, { + .name = "imx35-sdma", + .driver_data = IMX35_SDMA, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(platform, sdma_devtypes); + +static const struct of_device_id sdma_dt_ids[] = { + { .compatible = "fsl,imx31-sdma", .data = &sdma_devtypes[IMX31_SDMA], }, + { .compatible = "fsl,imx35-sdma", .data = &sdma_devtypes[IMX35_SDMA], }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sdma_dt_ids); + +#define SDMA_H_CONFIG_DSPDMA BIT(12) /* indicates if the DSPDMA is used */ +#define SDMA_H_CONFIG_RTD_PINS BIT(11) /* indicates if Real-Time Debug pins are enabled */ +#define SDMA_H_CONFIG_ACR BIT(4) /* indicates if AHB freq /core freq = 2 or 1 */ +#define SDMA_H_CONFIG_CSM (3) /* indicates which context switch mode is selected*/ + +static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event) +{ + u32 chnenbl0 = (sdma->devtype == IMX31_SDMA ? SDMA_CHNENBL0_IMX31 : + SDMA_CHNENBL0_IMX35); + return chnenbl0 + event * 4; +} + +static int sdma_config_ownership(struct sdma_channel *sdmac, + bool event_override, bool mcu_override, bool dsp_override) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + unsigned long evt, mcu, dsp; + + if (event_override && mcu_override && dsp_override) + return -EINVAL; + + evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR); + mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR); + dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR); + + if (dsp_override) + __clear_bit(channel, &dsp); + else + __set_bit(channel, &dsp); + + if (event_override) + __clear_bit(channel, &evt); + else + __set_bit(channel, &evt); + + if (mcu_override) + __clear_bit(channel, &mcu); + else + __set_bit(channel, &mcu); + + writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR); + writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR); + writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR); + + return 0; +} + +static void sdma_enable_channel(struct sdma_engine *sdma, int channel) +{ + writel(BIT(channel), sdma->regs + SDMA_H_START); +} + +/* + * sdma_run_channel - run a channel and wait till it's done + */ +static int sdma_run_channel(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + int ret; + + init_completion(&sdmac->done); + + sdma_enable_channel(sdma, channel); + + ret = wait_for_completion_timeout(&sdmac->done, HZ); + + return ret ? 0 : -ETIMEDOUT; +} + +static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size, + u32 address) +{ + struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd; + void *buf_virt; + dma_addr_t buf_phys; + int ret; + + mutex_lock(&sdma->channel_0_lock); + + buf_virt = dma_alloc_coherent(NULL, + size, + &buf_phys, GFP_KERNEL); + if (!buf_virt) { + ret = -ENOMEM; + goto err_out; + } + + bd0->mode.command = C0_SETPM; + bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; + bd0->mode.count = size / 2; + bd0->buffer_addr = buf_phys; + bd0->ext_buffer_addr = address; + + memcpy(buf_virt, buf, size); + + ret = sdma_run_channel(&sdma->channel[0]); + + dma_free_coherent(NULL, size, buf_virt, buf_phys); + +err_out: + mutex_unlock(&sdma->channel_0_lock); + + return ret; +} + +static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + unsigned long val; + u32 chnenbl = chnenbl_ofs(sdma, event); + + val = readl_relaxed(sdma->regs + chnenbl); + __set_bit(channel, &val); + writel_relaxed(val, sdma->regs + chnenbl); +} + +static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + u32 chnenbl = chnenbl_ofs(sdma, event); + unsigned long val; + + val = readl_relaxed(sdma->regs + chnenbl); + __clear_bit(channel, &val); + writel_relaxed(val, sdma->regs + chnenbl); +} + +static void sdma_handle_channel_loop(struct sdma_channel *sdmac) +{ + struct sdma_buffer_descriptor *bd; + + /* + * loop mode. Iterate over descriptors, re-setup them and + * call callback function. + */ + while (1) { + bd = &sdmac->bd[sdmac->buf_tail]; + + if (bd->mode.status & BD_DONE) + break; + + if (bd->mode.status & BD_RROR) + sdmac->status = DMA_ERROR; + else + sdmac->status = DMA_IN_PROGRESS; + + bd->mode.status |= BD_DONE; + sdmac->buf_tail++; + sdmac->buf_tail %= sdmac->num_bd; + + if (sdmac->desc.callback) + sdmac->desc.callback(sdmac->desc.callback_param); + } +} + +static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac) +{ + struct sdma_buffer_descriptor *bd; + int i, error = 0; + + sdmac->chn_real_count = 0; + /* + * non loop mode. Iterate over all descriptors, collect + * errors and call callback function + */ + for (i = 0; i < sdmac->num_bd; i++) { + bd = &sdmac->bd[i]; + + if (bd->mode.status & (BD_DONE | BD_RROR)) + error = -EIO; + sdmac->chn_real_count += bd->mode.count; + } + + if (error) + sdmac->status = DMA_ERROR; + else + sdmac->status = DMA_SUCCESS; + + dma_cookie_complete(&sdmac->desc); + if (sdmac->desc.callback) + sdmac->desc.callback(sdmac->desc.callback_param); +} + +static void mxc_sdma_handle_channel(struct sdma_channel *sdmac) +{ + complete(&sdmac->done); + + /* not interested in channel 0 interrupts */ + if (sdmac->channel == 0) + return; + + if (sdmac->flags & IMX_DMA_SG_LOOP) + sdma_handle_channel_loop(sdmac); + else + mxc_sdma_handle_channel_normal(sdmac); +} + +static irqreturn_t sdma_int_handler(int irq, void *dev_id) +{ + struct sdma_engine *sdma = dev_id; + unsigned long stat; + + stat = readl_relaxed(sdma->regs + SDMA_H_INTR); + writel_relaxed(stat, sdma->regs + SDMA_H_INTR); + + while (stat) { + int channel = fls(stat) - 1; + struct sdma_channel *sdmac = &sdma->channel[channel]; + + mxc_sdma_handle_channel(sdmac); + + __clear_bit(channel, &stat); + } + + return IRQ_HANDLED; +} + +/* + * sets the pc of SDMA script according to the peripheral type + */ +static void sdma_get_pc(struct sdma_channel *sdmac, + enum sdma_peripheral_type peripheral_type) +{ + struct sdma_engine *sdma = sdmac->sdma; + int per_2_emi = 0, emi_2_per = 0; + /* + * These are needed once we start to support transfers between + * two peripherals or memory-to-memory transfers + */ + int per_2_per = 0, emi_2_emi = 0; + + sdmac->pc_from_device = 0; + sdmac->pc_to_device = 0; + + switch (peripheral_type) { + case IMX_DMATYPE_MEMORY: + emi_2_emi = sdma->script_addrs->ap_2_ap_addr; + break; + case IMX_DMATYPE_DSP: + emi_2_per = sdma->script_addrs->bp_2_ap_addr; + per_2_emi = sdma->script_addrs->ap_2_bp_addr; + break; + case IMX_DMATYPE_FIRI: + per_2_emi = sdma->script_addrs->firi_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_firi_addr; + break; + case IMX_DMATYPE_UART: + per_2_emi = sdma->script_addrs->uart_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_app_addr; + break; + case IMX_DMATYPE_UART_SP: + per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + break; + case IMX_DMATYPE_ATA: + per_2_emi = sdma->script_addrs->ata_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_ata_addr; + break; + case IMX_DMATYPE_CSPI: + case IMX_DMATYPE_EXT: + case IMX_DMATYPE_SSI: + per_2_emi = sdma->script_addrs->app_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_app_addr; + break; + case IMX_DMATYPE_SSI_SP: + case IMX_DMATYPE_MMC: + case IMX_DMATYPE_SDHC: + case IMX_DMATYPE_CSPI_SP: + case IMX_DMATYPE_ESAI: + case IMX_DMATYPE_MSHC_SP: + per_2_emi = sdma->script_addrs->shp_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + break; + case IMX_DMATYPE_ASRC: + per_2_emi = sdma->script_addrs->asrc_2_mcu_addr; + emi_2_per = sdma->script_addrs->asrc_2_mcu_addr; + per_2_per = sdma->script_addrs->per_2_per_addr; + break; + case IMX_DMATYPE_MSHC: + per_2_emi = sdma->script_addrs->mshc_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_mshc_addr; + break; + case IMX_DMATYPE_CCM: + per_2_emi = sdma->script_addrs->dptc_dvfs_addr; + break; + case IMX_DMATYPE_SPDIF: + per_2_emi = sdma->script_addrs->spdif_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_spdif_addr; + break; + case IMX_DMATYPE_IPU_MEMORY: + emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr; + break; + default: + break; + } + + sdmac->pc_from_device = per_2_emi; + sdmac->pc_to_device = emi_2_per; +} + +static int sdma_load_context(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + int load_address; + struct sdma_context_data *context = sdma->context; + struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd; + int ret; + + if (sdmac->direction == DMA_DEV_TO_MEM) { + load_address = sdmac->pc_from_device; + } else { + load_address = sdmac->pc_to_device; + } + + if (load_address < 0) + return load_address; + + dev_dbg(sdma->dev, "load_address = %d\n", load_address); + dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level); + dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr); + dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr); + dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]); + dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]); + + mutex_lock(&sdma->channel_0_lock); + + memset(context, 0, sizeof(*context)); + context->channel_state.pc = load_address; + + /* Send by context the event mask,base address for peripheral + * and watermark level + */ + context->gReg[0] = sdmac->event_mask[1]; + context->gReg[1] = sdmac->event_mask[0]; + context->gReg[2] = sdmac->per_addr; + context->gReg[6] = sdmac->shp_addr; + context->gReg[7] = sdmac->watermark_level; + + bd0->mode.command = C0_SETDM; + bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; + bd0->mode.count = sizeof(*context) / 4; + bd0->buffer_addr = sdma->context_phys; + bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel; + + ret = sdma_run_channel(&sdma->channel[0]); + + mutex_unlock(&sdma->channel_0_lock); + + return ret; +} + +static void sdma_disable_channel(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + + writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP); + sdmac->status = DMA_ERROR; +} + +static int sdma_config_channel(struct sdma_channel *sdmac) +{ + int ret; + + sdma_disable_channel(sdmac); + + sdmac->event_mask[0] = 0; + sdmac->event_mask[1] = 0; + sdmac->shp_addr = 0; + sdmac->per_addr = 0; + + if (sdmac->event_id0) { + if (sdmac->event_id0 >= sdmac->sdma->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id0); + } + + switch (sdmac->peripheral_type) { + case IMX_DMATYPE_DSP: + sdma_config_ownership(sdmac, false, true, true); + break; + case IMX_DMATYPE_MEMORY: + sdma_config_ownership(sdmac, false, true, false); + break; + default: + sdma_config_ownership(sdmac, true, true, false); + break; + } + + sdma_get_pc(sdmac, sdmac->peripheral_type); + + if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) && + (sdmac->peripheral_type != IMX_DMATYPE_DSP)) { + /* Handle multiple event channels differently */ + if (sdmac->event_id1) { + sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32); + if (sdmac->event_id1 > 31) + __set_bit(31, &sdmac->watermark_level); + sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32); + if (sdmac->event_id0 > 31) + __set_bit(30, &sdmac->watermark_level); + } else { + __set_bit(sdmac->event_id0, sdmac->event_mask); + } + /* Watermark Level */ + sdmac->watermark_level |= sdmac->watermark_level; + /* Address */ + sdmac->shp_addr = sdmac->per_address; + } else { + sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ + } + + ret = sdma_load_context(sdmac); + + return ret; +} + +static int sdma_set_channel_priority(struct sdma_channel *sdmac, + unsigned int priority) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + + if (priority < MXC_SDMA_MIN_PRIORITY + || priority > MXC_SDMA_MAX_PRIORITY) { + return -EINVAL; + } + + writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel); + + return 0; +} + +static int sdma_request_channel(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + int channel = sdmac->channel; + int ret = -EBUSY; + + sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL); + if (!sdmac->bd) { + ret = -ENOMEM; + goto out; + } + + memset(sdmac->bd, 0, PAGE_SIZE); + + sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys; + sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys; + + sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY); + + init_completion(&sdmac->done); + + sdmac->buf_tail = 0; + + return 0; +out: + + return ret; +} + +static struct sdma_channel *to_sdma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct sdma_channel, chan); +} + +static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + unsigned long flags; + struct sdma_channel *sdmac = to_sdma_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_irqsave(&sdmac->lock, flags); + + cookie = dma_cookie_assign(tx); + + spin_unlock_irqrestore(&sdmac->lock, flags); + + return cookie; +} + +static int sdma_alloc_chan_resources(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct imx_dma_data *data = chan->private; + int prio, ret; + + if (!data) + return -EINVAL; + + switch (data->priority) { + case DMA_PRIO_HIGH: + prio = 3; + break; + case DMA_PRIO_MEDIUM: + prio = 2; + break; + case DMA_PRIO_LOW: + default: + prio = 1; + break; + } + + sdmac->peripheral_type = data->peripheral_type; + sdmac->event_id0 = data->dma_request; + + clk_enable(sdmac->sdma->clk); + + ret = sdma_request_channel(sdmac); + if (ret) + return ret; + + ret = sdma_set_channel_priority(sdmac, prio); + if (ret) + return ret; + + dma_async_tx_descriptor_init(&sdmac->desc, chan); + sdmac->desc.tx_submit = sdma_tx_submit; + /* txd.flags will be overwritten in prep funcs */ + sdmac->desc.flags = DMA_CTRL_ACK; + + return 0; +} + +static void sdma_free_chan_resources(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + + sdma_disable_channel(sdmac); + + if (sdmac->event_id0) + sdma_event_disable(sdmac, sdmac->event_id0); + if (sdmac->event_id1) + sdma_event_disable(sdmac, sdmac->event_id1); + + sdmac->event_id0 = 0; + sdmac->event_id1 = 0; + + sdma_set_channel_priority(sdmac, 0); + + dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys); + + clk_disable(sdma->clk); +} + +static struct dma_async_tx_descriptor *sdma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + int ret, i, count; + int channel = sdmac->channel; + struct scatterlist *sg; + + if (sdmac->status == DMA_IN_PROGRESS) + return NULL; + sdmac->status = DMA_IN_PROGRESS; + + sdmac->flags = 0; + + dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n", + sg_len, channel); + + sdmac->direction = direction; + ret = sdma_load_context(sdmac); + if (ret) + goto err_out; + + if (sg_len > NUM_BD) { + dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n", + channel, sg_len, NUM_BD); + ret = -EINVAL; + goto err_out; + } + + sdmac->chn_count = 0; + for_each_sg(sgl, sg, sg_len, i) { + struct sdma_buffer_descriptor *bd = &sdmac->bd[i]; + int param; + + bd->buffer_addr = sg->dma_address; + + count = sg->length; + + if (count > 0xffff) { + dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n", + channel, count, 0xffff); + ret = -EINVAL; + goto err_out; + } + + bd->mode.count = count; + sdmac->chn_count += count; + + if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) { + ret = -EINVAL; + goto err_out; + } + + switch (sdmac->word_size) { + case DMA_SLAVE_BUSWIDTH_4_BYTES: + bd->mode.command = 0; + if (count & 3 || sg->dma_address & 3) + return NULL; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + bd->mode.command = 2; + if (count & 1 || sg->dma_address & 1) + return NULL; + break; + case DMA_SLAVE_BUSWIDTH_1_BYTE: + bd->mode.command = 1; + break; + default: + return NULL; + } + + param = BD_DONE | BD_EXTD | BD_CONT; + + if (i + 1 == sg_len) { + param |= BD_INTR; + param |= BD_LAST; + param &= ~BD_CONT; + } + + dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n", + i, count, sg->dma_address, + param & BD_WRAP ? "wrap" : "", + param & BD_INTR ? " intr" : ""); + + bd->mode.status = param; + } + + sdmac->num_bd = sg_len; + sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys; + + return &sdmac->desc; +err_out: + sdmac->status = DMA_ERROR; + return NULL; +} + +static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + void *context) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + int num_periods = buf_len / period_len; + int channel = sdmac->channel; + int ret, i = 0, buf = 0; + + dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel); + + if (sdmac->status == DMA_IN_PROGRESS) + return NULL; + + sdmac->status = DMA_IN_PROGRESS; + + sdmac->flags |= IMX_DMA_SG_LOOP; + sdmac->direction = direction; + ret = sdma_load_context(sdmac); + if (ret) + goto err_out; + + if (num_periods > NUM_BD) { + dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n", + channel, num_periods, NUM_BD); + goto err_out; + } + + if (period_len > 0xffff) { + dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %d > %d\n", + channel, period_len, 0xffff); + goto err_out; + } + + while (buf < buf_len) { + struct sdma_buffer_descriptor *bd = &sdmac->bd[i]; + int param; + + bd->buffer_addr = dma_addr; + + bd->mode.count = period_len; + + if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) + goto err_out; + if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES) + bd->mode.command = 0; + else + bd->mode.command = sdmac->word_size; + + param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR; + if (i + 1 == num_periods) + param |= BD_WRAP; + + dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n", + i, period_len, dma_addr, + param & BD_WRAP ? "wrap" : "", + param & BD_INTR ? " intr" : ""); + + bd->mode.status = param; + + dma_addr += period_len; + buf += period_len; + + i++; + } + + sdmac->num_bd = num_periods; + sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys; + + return &sdmac->desc; +err_out: + sdmac->status = DMA_ERROR; + return NULL; +} + +static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct dma_slave_config *dmaengine_cfg = (void *)arg; + + switch (cmd) { + case DMA_TERMINATE_ALL: + sdma_disable_channel(sdmac); + return 0; + case DMA_SLAVE_CONFIG: + if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) { + sdmac->per_address = dmaengine_cfg->src_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst * + dmaengine_cfg->src_addr_width; + sdmac->word_size = dmaengine_cfg->src_addr_width; + } else { + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->watermark_level = dmaengine_cfg->dst_maxburst * + dmaengine_cfg->dst_addr_width; + sdmac->word_size = dmaengine_cfg->dst_addr_width; + } + sdmac->direction = dmaengine_cfg->direction; + return sdma_config_channel(sdmac); + default: + return -ENOSYS; + } + + return -EINVAL; +} + +static enum dma_status sdma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + dma_cookie_t last_used; + + last_used = chan->cookie; + + dma_set_tx_state(txstate, chan->completed_cookie, last_used, + sdmac->chn_count - sdmac->chn_real_count); + + return sdmac->status; +} + +static void sdma_issue_pending(struct dma_chan *chan) +{ + struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; + + if (sdmac->status == DMA_IN_PROGRESS) + sdma_enable_channel(sdma, sdmac->channel); +} + +#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1 34 + +static void sdma_add_scripts(struct sdma_engine *sdma, + const struct sdma_script_start_addrs *addr) +{ + s32 *addr_arr = (u32 *)addr; + s32 *saddr_arr = (u32 *)sdma->script_addrs; + int i; + + for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++) + if (addr_arr[i] > 0) + saddr_arr[i] = addr_arr[i]; +} + +static void sdma_load_firmware(const struct firmware *fw, void *context) +{ + struct sdma_engine *sdma = context; + const struct sdma_firmware_header *header; + const struct sdma_script_start_addrs *addr; + unsigned short *ram_code; + + if (!fw) { + dev_err(sdma->dev, "firmware not found\n"); + return; + } + + if (fw->size < sizeof(*header)) + goto err_firmware; + + header = (struct sdma_firmware_header *)fw->data; + + if (header->magic != SDMA_FIRMWARE_MAGIC) + goto err_firmware; + if (header->ram_code_start + header->ram_code_size > fw->size) + goto err_firmware; + + addr = (void *)header + header->script_addrs_start; + ram_code = (void *)header + header->ram_code_start; + + clk_enable(sdma->clk); + /* download the RAM image for SDMA */ + sdma_load_script(sdma, ram_code, + header->ram_code_size, + addr->ram_code_start_addr); + clk_disable(sdma->clk); + + sdma_add_scripts(sdma, addr); + + dev_info(sdma->dev, "loaded firmware %d.%d\n", + header->version_major, + header->version_minor); + +err_firmware: + release_firmware(fw); +} + +static int __init sdma_get_firmware(struct sdma_engine *sdma, + const char *fw_name) +{ + int ret; + + ret = request_firmware_nowait(THIS_MODULE, + FW_ACTION_HOTPLUG, fw_name, sdma->dev, + GFP_KERNEL, sdma, sdma_load_firmware); + + return ret; +} + +static int __init sdma_init(struct sdma_engine *sdma) +{ + int i, ret; + dma_addr_t ccb_phys; + + switch (sdma->devtype) { + case IMX31_SDMA: + sdma->num_events = 32; + break; + case IMX35_SDMA: + sdma->num_events = 48; + break; + default: + dev_err(sdma->dev, "Unknown sdma type %d. aborting\n", + sdma->devtype); + return -ENODEV; + } + + clk_enable(sdma->clk); + + /* Be sure SDMA has not started yet */ + writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); + + sdma->channel_control = dma_alloc_coherent(NULL, + MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) + + sizeof(struct sdma_context_data), + &ccb_phys, GFP_KERNEL); + + if (!sdma->channel_control) { + ret = -ENOMEM; + goto err_dma_alloc; + } + + sdma->context = (void *)sdma->channel_control + + MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control); + sdma->context_phys = ccb_phys + + MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control); + + /* Zero-out the CCB structures array just allocated */ + memset(sdma->channel_control, 0, + MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control)); + + /* disable all channels */ + for (i = 0; i < sdma->num_events; i++) + writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i)); + + /* All channels have priority 0 */ + for (i = 0; i < MAX_DMA_CHANNELS; i++) + writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4); + + ret = sdma_request_channel(&sdma->channel[0]); + if (ret) + goto err_dma_alloc; + + sdma_config_ownership(&sdma->channel[0], false, true, false); + + /* Set Command Channel (Channel Zero) */ + writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR); + + /* Set bits of CONFIG register but with static context switching */ + /* FIXME: Check whether to set ACR bit depending on clock ratios */ + writel_relaxed(0, sdma->regs + SDMA_H_CONFIG); + + writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR); + + /* Set bits of CONFIG register with given context switching mode */ + writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG); + + /* Initializes channel's priorities */ + sdma_set_channel_priority(&sdma->channel[0], 7); + + clk_disable(sdma->clk); + + return 0; + +err_dma_alloc: + clk_disable(sdma->clk); + dev_err(sdma->dev, "initialisation failed with %d\n", ret); + return ret; +} + +static int __init sdma_probe(struct platform_device *pdev) +{ + const struct of_device_id *of_id = + of_match_device(sdma_dt_ids, &pdev->dev); + struct device_node *np = pdev->dev.of_node; + const char *fw_name; + int ret; + int irq; + struct resource *iores; + struct sdma_platform_data *pdata = pdev->dev.platform_data; + int i; + struct sdma_engine *sdma; + s32 *saddr_arr; + + sdma = kzalloc(sizeof(*sdma), GFP_KERNEL); + if (!sdma) + return -ENOMEM; + + mutex_init(&sdma->channel_0_lock); + + sdma->dev = &pdev->dev; + + iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq = platform_get_irq(pdev, 0); + if (!iores || irq < 0) { + ret = -EINVAL; + goto err_irq; + } + + if (!request_mem_region(iores->start, resource_size(iores), pdev->name)) { + ret = -EBUSY; + goto err_request_region; + } + + sdma->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(sdma->clk)) { + ret = PTR_ERR(sdma->clk); + goto err_clk; + } + + sdma->regs = ioremap(iores->start, resource_size(iores)); + if (!sdma->regs) { + ret = -ENOMEM; + goto err_ioremap; + } + + ret = request_irq(irq, sdma_int_handler, 0, "sdma", sdma); + if (ret) + goto err_request_irq; + + sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); + if (!sdma->script_addrs) { + ret = -ENOMEM; + goto err_alloc; + } + + /* initially no scripts available */ + saddr_arr = (s32 *)sdma->script_addrs; + for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++) + saddr_arr[i] = -EINVAL; + + if (of_id) + pdev->id_entry = of_id->data; + sdma->devtype = pdev->id_entry->driver_data; + + dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask); + + INIT_LIST_HEAD(&sdma->dma_device.channels); + /* Initialize channel parameters */ + for (i = 0; i < MAX_DMA_CHANNELS; i++) { + struct sdma_channel *sdmac = &sdma->channel[i]; + + sdmac->sdma = sdma; + spin_lock_init(&sdmac->lock); + + sdmac->chan.device = &sdma->dma_device; + dma_cookie_init(&sdmac->chan); + sdmac->channel = i; + + /* + * Add the channel to the DMAC list. Do not add channel 0 though + * because we need it internally in the SDMA driver. This also means + * that channel 0 in dmaengine counting matches sdma channel 1. + */ + if (i) + list_add_tail(&sdmac->chan.device_node, + &sdma->dma_device.channels); + } + + ret = sdma_init(sdma); + if (ret) + goto err_init; + + if (pdata && pdata->script_addrs) + sdma_add_scripts(sdma, pdata->script_addrs); + + if (pdata) { + ret = sdma_get_firmware(sdma, pdata->fw_name); + if (ret) + dev_warn(&pdev->dev, "failed to get firmware from platform data\n"); + } else { + /* + * Because that device tree does not encode ROM script address, + * the RAM script in firmware is mandatory for device tree + * probe, otherwise it fails. + */ + ret = of_property_read_string(np, "fsl,sdma-ram-script-name", + &fw_name); + if (ret) + dev_warn(&pdev->dev, "failed to get firmware name\n"); + else { + ret = sdma_get_firmware(sdma, fw_name); + if (ret) + dev_warn(&pdev->dev, "failed to get firmware from device tree\n"); + } + } + + sdma->dma_device.dev = &pdev->dev; + + sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources; + sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources; + sdma->dma_device.device_tx_status = sdma_tx_status; + sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg; + sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic; + sdma->dma_device.device_control = sdma_control; + sdma->dma_device.device_issue_pending = sdma_issue_pending; + sdma->dma_device.dev->dma_parms = &sdma->dma_parms; + dma_set_max_seg_size(sdma->dma_device.dev, 65535); + + ret = dma_async_device_register(&sdma->dma_device); + if (ret) { + dev_err(&pdev->dev, "unable to register\n"); + goto err_init; + } + + dev_info(sdma->dev, "initialized\n"); + + return 0; + +err_init: + kfree(sdma->script_addrs); +err_alloc: + free_irq(irq, sdma); +err_request_irq: + iounmap(sdma->regs); +err_ioremap: + clk_put(sdma->clk); +err_clk: + release_mem_region(iores->start, resource_size(iores)); +err_request_region: +err_irq: + kfree(sdma); + return ret; +} + +static int __exit sdma_remove(struct platform_device *pdev) +{ + return -EBUSY; +} + +static struct platform_driver sdma_driver = { + .driver = { + .name = "imx-sdma", + .of_match_table = sdma_dt_ids, + }, + .id_table = sdma_devtypes, + .remove = __exit_p(sdma_remove), +}; + +static int __init sdma_module_init(void) +{ + return platform_driver_probe(&sdma_driver, sdma_probe); +} +module_init(sdma_module_init); + +MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>"); +MODULE_DESCRIPTION("i.MX SDMA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c new file mode 100644 index 00000000..c900ca7a --- /dev/null +++ b/drivers/dma/intel_mid_dma.c @@ -0,0 +1,1460 @@ +/* + * intel_mid_dma.c - Intel Langwell DMA Drivers + * + * Copyright (C) 2008-10 Intel Corp + * Author: Vinod Koul <vinod.koul@intel.com> + * The driver design is based on dw_dmac driver + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * + */ +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/pm_runtime.h> +#include <linux/intel_mid_dma.h> +#include <linux/module.h> + +#include "dmaengine.h" + +#define MAX_CHAN 4 /*max ch across controllers*/ +#include "intel_mid_dma_regs.h" + +#define INTEL_MID_DMAC1_ID 0x0814 +#define INTEL_MID_DMAC2_ID 0x0813 +#define INTEL_MID_GP_DMAC2_ID 0x0827 +#define INTEL_MFLD_DMAC1_ID 0x0830 +#define LNW_PERIPHRAL_MASK_BASE 0xFFAE8008 +#define LNW_PERIPHRAL_MASK_SIZE 0x10 +#define LNW_PERIPHRAL_STATUS 0x0 +#define LNW_PERIPHRAL_MASK 0x8 + +struct intel_mid_dma_probe_info { + u8 max_chan; + u8 ch_base; + u16 block_size; + u32 pimr_mask; +}; + +#define INFO(_max_chan, _ch_base, _block_size, _pimr_mask) \ + ((kernel_ulong_t)&(struct intel_mid_dma_probe_info) { \ + .max_chan = (_max_chan), \ + .ch_base = (_ch_base), \ + .block_size = (_block_size), \ + .pimr_mask = (_pimr_mask), \ + }) + +/***************************************************************************** +Utility Functions*/ +/** + * get_ch_index - convert status to channel + * @status: status mask + * @base: dma ch base value + * + * Modify the status mask and return the channel index needing + * attention (or -1 if neither) + */ +static int get_ch_index(int *status, unsigned int base) +{ + int i; + for (i = 0; i < MAX_CHAN; i++) { + if (*status & (1 << (i + base))) { + *status = *status & ~(1 << (i + base)); + pr_debug("MDMA: index %d New status %x\n", i, *status); + return i; + } + } + return -1; +} + +/** + * get_block_ts - calculates dma transaction length + * @len: dma transfer length + * @tx_width: dma transfer src width + * @block_size: dma controller max block size + * + * Based on src width calculate the DMA trsaction length in data items + * return data items or FFFF if exceeds max length for block + */ +static int get_block_ts(int len, int tx_width, int block_size) +{ + int byte_width = 0, block_ts = 0; + + switch (tx_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + byte_width = 1; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + byte_width = 2; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + default: + byte_width = 4; + break; + } + + block_ts = len/byte_width; + if (block_ts > block_size) + block_ts = 0xFFFF; + return block_ts; +} + +/***************************************************************************** +DMAC1 interrupt Functions*/ + +/** + * dmac1_mask_periphral_intr - mask the periphral interrupt + * @mid: dma device for which masking is required + * + * Masks the DMA periphral interrupt + * this is valid for DMAC1 family controllers only + * This controller should have periphral mask registers already mapped + */ +static void dmac1_mask_periphral_intr(struct middma_device *mid) +{ + u32 pimr; + + if (mid->pimr_mask) { + pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK); + pimr |= mid->pimr_mask; + writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK); + } + return; +} + +/** + * dmac1_unmask_periphral_intr - unmask the periphral interrupt + * @midc: dma channel for which masking is required + * + * UnMasks the DMA periphral interrupt, + * this is valid for DMAC1 family controllers only + * This controller should have periphral mask registers already mapped + */ +static void dmac1_unmask_periphral_intr(struct intel_mid_dma_chan *midc) +{ + u32 pimr; + struct middma_device *mid = to_middma_device(midc->chan.device); + + if (mid->pimr_mask) { + pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK); + pimr &= ~mid->pimr_mask; + writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK); + } + return; +} + +/** + * enable_dma_interrupt - enable the periphral interrupt + * @midc: dma channel for which enable interrupt is required + * + * Enable the DMA periphral interrupt, + * this is valid for DMAC1 family controllers only + * This controller should have periphral mask registers already mapped + */ +static void enable_dma_interrupt(struct intel_mid_dma_chan *midc) +{ + dmac1_unmask_periphral_intr(midc); + + /*en ch interrupts*/ + iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR); + iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR); + return; +} + +/** + * disable_dma_interrupt - disable the periphral interrupt + * @midc: dma channel for which disable interrupt is required + * + * Disable the DMA periphral interrupt, + * this is valid for DMAC1 family controllers only + * This controller should have periphral mask registers already mapped + */ +static void disable_dma_interrupt(struct intel_mid_dma_chan *midc) +{ + /*Check LPE PISR, make sure fwd is disabled*/ + iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_BLOCK); + iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR); + iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR); + return; +} + +/***************************************************************************** +DMA channel helper Functions*/ +/** + * mid_desc_get - get a descriptor + * @midc: dma channel for which descriptor is required + * + * Obtain a descriptor for the channel. Returns NULL if none are free. + * Once the descriptor is returned it is private until put on another + * list or freed + */ +static struct intel_mid_dma_desc *midc_desc_get(struct intel_mid_dma_chan *midc) +{ + struct intel_mid_dma_desc *desc, *_desc; + struct intel_mid_dma_desc *ret = NULL; + + spin_lock_bh(&midc->lock); + list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) { + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + } + spin_unlock_bh(&midc->lock); + return ret; +} + +/** + * mid_desc_put - put a descriptor + * @midc: dma channel for which descriptor is required + * @desc: descriptor to put + * + * Return a descriptor from lwn_desc_get back to the free pool + */ +static void midc_desc_put(struct intel_mid_dma_chan *midc, + struct intel_mid_dma_desc *desc) +{ + if (desc) { + spin_lock_bh(&midc->lock); + list_add_tail(&desc->desc_node, &midc->free_list); + spin_unlock_bh(&midc->lock); + } +} +/** + * midc_dostart - begin a DMA transaction + * @midc: channel for which txn is to be started + * @first: first descriptor of series + * + * Load a transaction into the engine. This must be called with midc->lock + * held and bh disabled. + */ +static void midc_dostart(struct intel_mid_dma_chan *midc, + struct intel_mid_dma_desc *first) +{ + struct middma_device *mid = to_middma_device(midc->chan.device); + + /* channel is idle */ + if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) { + /*error*/ + pr_err("ERR_MDMA: channel is busy in start\n"); + /* The tasklet will hopefully advance the queue... */ + return; + } + midc->busy = true; + /*write registers and en*/ + iowrite32(first->sar, midc->ch_regs + SAR); + iowrite32(first->dar, midc->ch_regs + DAR); + iowrite32(first->lli_phys, midc->ch_regs + LLP); + iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH); + iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW); + iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW); + iowrite32(first->ctl_hi, midc->ch_regs + CTL_HIGH); + pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n", + (int)first->sar, (int)first->dar, first->cfg_hi, + first->cfg_lo, first->ctl_hi, first->ctl_lo); + first->status = DMA_IN_PROGRESS; + + iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN); +} + +/** + * midc_descriptor_complete - process completed descriptor + * @midc: channel owning the descriptor + * @desc: the descriptor itself + * + * Process a completed descriptor and perform any callbacks upon + * the completion. The completion handling drops the lock during the + * callbacks but must be called with the lock held. + */ +static void midc_descriptor_complete(struct intel_mid_dma_chan *midc, + struct intel_mid_dma_desc *desc) + __releases(&midc->lock) __acquires(&midc->lock) +{ + struct dma_async_tx_descriptor *txd = &desc->txd; + dma_async_tx_callback callback_txd = NULL; + struct intel_mid_dma_lli *llitem; + void *param_txd = NULL; + + dma_cookie_complete(txd); + callback_txd = txd->callback; + param_txd = txd->callback_param; + + if (desc->lli != NULL) { + /*clear the DONE bit of completed LLI in memory*/ + llitem = desc->lli + desc->current_lli; + llitem->ctl_hi &= CLEAR_DONE; + if (desc->current_lli < desc->lli_length-1) + (desc->current_lli)++; + else + desc->current_lli = 0; + } + spin_unlock_bh(&midc->lock); + if (callback_txd) { + pr_debug("MDMA: TXD callback set ... calling\n"); + callback_txd(param_txd); + } + if (midc->raw_tfr) { + desc->status = DMA_SUCCESS; + if (desc->lli != NULL) { + pci_pool_free(desc->lli_pool, desc->lli, + desc->lli_phys); + pci_pool_destroy(desc->lli_pool); + desc->lli = NULL; + } + list_move(&desc->desc_node, &midc->free_list); + midc->busy = false; + } + spin_lock_bh(&midc->lock); + +} +/** + * midc_scan_descriptors - check the descriptors in channel + * mark completed when tx is completete + * @mid: device + * @midc: channel to scan + * + * Walk the descriptor chain for the device and process any entries + * that are complete. + */ +static void midc_scan_descriptors(struct middma_device *mid, + struct intel_mid_dma_chan *midc) +{ + struct intel_mid_dma_desc *desc = NULL, *_desc = NULL; + + /*tx is complete*/ + list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) { + if (desc->status == DMA_IN_PROGRESS) + midc_descriptor_complete(midc, desc); + } + return; + } +/** + * midc_lli_fill_sg - Helper function to convert + * SG list to Linked List Items. + *@midc: Channel + *@desc: DMA descriptor + *@sglist: Pointer to SG list + *@sglen: SG list length + *@flags: DMA transaction flags + * + * Walk through the SG list and convert the SG list into Linked + * List Items (LLI). + */ +static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc, + struct intel_mid_dma_desc *desc, + struct scatterlist *sglist, + unsigned int sglen, + unsigned int flags) +{ + struct intel_mid_dma_slave *mids; + struct scatterlist *sg; + dma_addr_t lli_next, sg_phy_addr; + struct intel_mid_dma_lli *lli_bloc_desc; + union intel_mid_dma_ctl_lo ctl_lo; + union intel_mid_dma_ctl_hi ctl_hi; + int i; + + pr_debug("MDMA: Entered midc_lli_fill_sg\n"); + mids = midc->mid_slave; + + lli_bloc_desc = desc->lli; + lli_next = desc->lli_phys; + + ctl_lo.ctl_lo = desc->ctl_lo; + ctl_hi.ctl_hi = desc->ctl_hi; + for_each_sg(sglist, sg, sglen, i) { + /*Populate CTL_LOW and LLI values*/ + if (i != sglen - 1) { + lli_next = lli_next + + sizeof(struct intel_mid_dma_lli); + } else { + /*Check for circular list, otherwise terminate LLI to ZERO*/ + if (flags & DMA_PREP_CIRCULAR_LIST) { + pr_debug("MDMA: LLI is configured in circular mode\n"); + lli_next = desc->lli_phys; + } else { + lli_next = 0; + ctl_lo.ctlx.llp_dst_en = 0; + ctl_lo.ctlx.llp_src_en = 0; + } + } + /*Populate CTL_HI values*/ + ctl_hi.ctlx.block_ts = get_block_ts(sg->length, + desc->width, + midc->dma->block_size); + /*Populate SAR and DAR values*/ + sg_phy_addr = sg_phys(sg); + if (desc->dirn == DMA_MEM_TO_DEV) { + lli_bloc_desc->sar = sg_phy_addr; + lli_bloc_desc->dar = mids->dma_slave.dst_addr; + } else if (desc->dirn == DMA_DEV_TO_MEM) { + lli_bloc_desc->sar = mids->dma_slave.src_addr; + lli_bloc_desc->dar = sg_phy_addr; + } + /*Copy values into block descriptor in system memroy*/ + lli_bloc_desc->llp = lli_next; + lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo; + lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi; + + lli_bloc_desc++; + } + /*Copy very first LLI values to descriptor*/ + desc->ctl_lo = desc->lli->ctl_lo; + desc->ctl_hi = desc->lli->ctl_hi; + desc->sar = desc->lli->sar; + desc->dar = desc->lli->dar; + + return 0; +} +/***************************************************************************** +DMA engine callback Functions*/ +/** + * intel_mid_dma_tx_submit - callback to submit DMA transaction + * @tx: dma engine descriptor + * + * Submit the DMA trasaction for this descriptor, start if ch idle + */ +static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct intel_mid_dma_desc *desc = to_intel_mid_dma_desc(tx); + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&midc->lock); + cookie = dma_cookie_assign(tx); + + if (list_empty(&midc->active_list)) + list_add_tail(&desc->desc_node, &midc->active_list); + else + list_add_tail(&desc->desc_node, &midc->queue); + + midc_dostart(midc, desc); + spin_unlock_bh(&midc->lock); + + return cookie; +} + +/** + * intel_mid_dma_issue_pending - callback to issue pending txn + * @chan: chan where pending trascation needs to be checked and submitted + * + * Call for scan to issue pending descriptors + */ +static void intel_mid_dma_issue_pending(struct dma_chan *chan) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + + spin_lock_bh(&midc->lock); + if (!list_empty(&midc->queue)) + midc_scan_descriptors(to_middma_device(chan->device), midc); + spin_unlock_bh(&midc->lock); +} + +/** + * intel_mid_dma_tx_status - Return status of txn + * @chan: chan for where status needs to be checked + * @cookie: cookie for txn + * @txstate: DMA txn state + * + * Return status of DMA txn + */ +static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_SUCCESS) { + spin_lock_bh(&midc->lock); + midc_scan_descriptors(to_middma_device(chan->device), midc); + spin_unlock_bh(&midc->lock); + + ret = dma_cookie_status(chan, cookie, txstate); + } + + return ret; +} + +static int dma_slave_control(struct dma_chan *chan, unsigned long arg) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + struct dma_slave_config *slave = (struct dma_slave_config *)arg; + struct intel_mid_dma_slave *mid_slave; + + BUG_ON(!midc); + BUG_ON(!slave); + pr_debug("MDMA: slave control called\n"); + + mid_slave = to_intel_mid_dma_slave(slave); + + BUG_ON(!mid_slave); + + midc->mid_slave = mid_slave; + return 0; +} +/** + * intel_mid_dma_device_control - DMA device control + * @chan: chan for DMA control + * @cmd: control cmd + * @arg: cmd arg value + * + * Perform DMA control command + */ +static int intel_mid_dma_device_control(struct dma_chan *chan, + enum dma_ctrl_cmd cmd, unsigned long arg) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + struct middma_device *mid = to_middma_device(chan->device); + struct intel_mid_dma_desc *desc, *_desc; + union intel_mid_dma_cfg_lo cfg_lo; + + if (cmd == DMA_SLAVE_CONFIG) + return dma_slave_control(chan, arg); + + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + spin_lock_bh(&midc->lock); + if (midc->busy == false) { + spin_unlock_bh(&midc->lock); + return 0; + } + /*Suspend and disable the channel*/ + cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW); + cfg_lo.cfgx.ch_susp = 1; + iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW); + iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN); + midc->busy = false; + /* Disable interrupts */ + disable_dma_interrupt(midc); + midc->descs_allocated = 0; + + spin_unlock_bh(&midc->lock); + list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) { + if (desc->lli != NULL) { + pci_pool_free(desc->lli_pool, desc->lli, + desc->lli_phys); + pci_pool_destroy(desc->lli_pool); + desc->lli = NULL; + } + list_move(&desc->desc_node, &midc->free_list); + } + return 0; +} + + +/** + * intel_mid_dma_prep_memcpy - Prep memcpy txn + * @chan: chan for DMA transfer + * @dest: destn address + * @src: src address + * @len: DMA transfer len + * @flags: DMA flags + * + * Perform a DMA memcpy. Note we support slave periphral DMA transfers only + * The periphral txn details should be filled in slave structure properly + * Returns the descriptor for this txn + */ +static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct intel_mid_dma_chan *midc; + struct intel_mid_dma_desc *desc = NULL; + struct intel_mid_dma_slave *mids; + union intel_mid_dma_ctl_lo ctl_lo; + union intel_mid_dma_ctl_hi ctl_hi; + union intel_mid_dma_cfg_lo cfg_lo; + union intel_mid_dma_cfg_hi cfg_hi; + enum dma_slave_buswidth width; + + pr_debug("MDMA: Prep for memcpy\n"); + BUG_ON(!chan); + if (!len) + return NULL; + + midc = to_intel_mid_dma_chan(chan); + BUG_ON(!midc); + + mids = midc->mid_slave; + BUG_ON(!mids); + + pr_debug("MDMA:called for DMA %x CH %d Length %zu\n", + midc->dma->pci_id, midc->ch_id, len); + pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n", + mids->cfg_mode, mids->dma_slave.direction, + mids->hs_mode, mids->dma_slave.src_addr_width); + + /*calculate CFG_LO*/ + if (mids->hs_mode == LNW_DMA_SW_HS) { + cfg_lo.cfg_lo = 0; + cfg_lo.cfgx.hs_sel_dst = 1; + cfg_lo.cfgx.hs_sel_src = 1; + } else if (mids->hs_mode == LNW_DMA_HW_HS) + cfg_lo.cfg_lo = 0x00000; + + /*calculate CFG_HI*/ + if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) { + /*SW HS only*/ + cfg_hi.cfg_hi = 0; + } else { + cfg_hi.cfg_hi = 0; + if (midc->dma->pimr_mask) { + cfg_hi.cfgx.protctl = 0x0; /*default value*/ + cfg_hi.cfgx.fifo_mode = 1; + if (mids->dma_slave.direction == DMA_MEM_TO_DEV) { + cfg_hi.cfgx.src_per = 0; + if (mids->device_instance == 0) + cfg_hi.cfgx.dst_per = 3; + if (mids->device_instance == 1) + cfg_hi.cfgx.dst_per = 1; + } else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) { + if (mids->device_instance == 0) + cfg_hi.cfgx.src_per = 2; + if (mids->device_instance == 1) + cfg_hi.cfgx.src_per = 0; + cfg_hi.cfgx.dst_per = 0; + } + } else { + cfg_hi.cfgx.protctl = 0x1; /*default value*/ + cfg_hi.cfgx.src_per = cfg_hi.cfgx.dst_per = + midc->ch_id - midc->dma->chan_base; + } + } + + /*calculate CTL_HI*/ + ctl_hi.ctlx.reser = 0; + ctl_hi.ctlx.done = 0; + width = mids->dma_slave.src_addr_width; + + ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size); + pr_debug("MDMA:calc len %d for block size %d\n", + ctl_hi.ctlx.block_ts, midc->dma->block_size); + /*calculate CTL_LO*/ + ctl_lo.ctl_lo = 0; + ctl_lo.ctlx.int_en = 1; + ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst; + ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst; + + /* + * Here we need some translation from "enum dma_slave_buswidth" + * to the format for our dma controller + * standard intel_mid_dmac's format + * 1 Byte 0b000 + * 2 Bytes 0b001 + * 4 Bytes 0b010 + */ + ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2; + ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2; + + if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) { + ctl_lo.ctlx.tt_fc = 0; + ctl_lo.ctlx.sinc = 0; + ctl_lo.ctlx.dinc = 0; + } else { + if (mids->dma_slave.direction == DMA_MEM_TO_DEV) { + ctl_lo.ctlx.sinc = 0; + ctl_lo.ctlx.dinc = 2; + ctl_lo.ctlx.tt_fc = 1; + } else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) { + ctl_lo.ctlx.sinc = 2; + ctl_lo.ctlx.dinc = 0; + ctl_lo.ctlx.tt_fc = 2; + } + } + + pr_debug("MDMA:Calc CTL LO %x, CTL HI %x, CFG LO %x, CFG HI %x\n", + ctl_lo.ctl_lo, ctl_hi.ctl_hi, cfg_lo.cfg_lo, cfg_hi.cfg_hi); + + enable_dma_interrupt(midc); + + desc = midc_desc_get(midc); + if (desc == NULL) + goto err_desc_get; + desc->sar = src; + desc->dar = dest ; + desc->len = len; + desc->cfg_hi = cfg_hi.cfg_hi; + desc->cfg_lo = cfg_lo.cfg_lo; + desc->ctl_lo = ctl_lo.ctl_lo; + desc->ctl_hi = ctl_hi.ctl_hi; + desc->width = width; + desc->dirn = mids->dma_slave.direction; + desc->lli_phys = 0; + desc->lli = NULL; + desc->lli_pool = NULL; + return &desc->txd; + +err_desc_get: + pr_err("ERR_MDMA: Failed to get desc\n"); + midc_desc_put(midc, desc); + return NULL; +} +/** + * intel_mid_dma_prep_slave_sg - Prep slave sg txn + * @chan: chan for DMA transfer + * @sgl: scatter gather list + * @sg_len: length of sg txn + * @direction: DMA transfer dirtn + * @flags: DMA flags + * @context: transfer context (ignored) + * + * Prepares LLI based periphral transfer + */ +static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct intel_mid_dma_chan *midc = NULL; + struct intel_mid_dma_slave *mids = NULL; + struct intel_mid_dma_desc *desc = NULL; + struct dma_async_tx_descriptor *txd = NULL; + union intel_mid_dma_ctl_lo ctl_lo; + + pr_debug("MDMA: Prep for slave SG\n"); + + if (!sg_len) { + pr_err("MDMA: Invalid SG length\n"); + return NULL; + } + midc = to_intel_mid_dma_chan(chan); + BUG_ON(!midc); + + mids = midc->mid_slave; + BUG_ON(!mids); + + if (!midc->dma->pimr_mask) { + /* We can still handle sg list with only one item */ + if (sg_len == 1) { + txd = intel_mid_dma_prep_memcpy(chan, + mids->dma_slave.dst_addr, + mids->dma_slave.src_addr, + sgl->length, + flags); + return txd; + } else { + pr_warn("MDMA: SG list is not supported by this controller\n"); + return NULL; + } + } + + pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n", + sg_len, direction, flags); + + txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sgl->length, flags); + if (NULL == txd) { + pr_err("MDMA: Prep memcpy failed\n"); + return NULL; + } + + desc = to_intel_mid_dma_desc(txd); + desc->dirn = direction; + ctl_lo.ctl_lo = desc->ctl_lo; + ctl_lo.ctlx.llp_dst_en = 1; + ctl_lo.ctlx.llp_src_en = 1; + desc->ctl_lo = ctl_lo.ctl_lo; + desc->lli_length = sg_len; + desc->current_lli = 0; + /* DMA coherent memory pool for LLI descriptors*/ + desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool", + midc->dma->pdev, + (sizeof(struct intel_mid_dma_lli)*sg_len), + 32, 0); + if (NULL == desc->lli_pool) { + pr_err("MID_DMA:LLI pool create failed\n"); + return NULL; + } + + desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys); + if (!desc->lli) { + pr_err("MID_DMA: LLI alloc failed\n"); + pci_pool_destroy(desc->lli_pool); + return NULL; + } + + midc_lli_fill_sg(midc, desc, sgl, sg_len, flags); + if (flags & DMA_PREP_INTERRUPT) { + iowrite32(UNMASK_INTR_REG(midc->ch_id), + midc->dma_base + MASK_BLOCK); + pr_debug("MDMA:Enabled Block interrupt\n"); + } + return &desc->txd; +} + +/** + * intel_mid_dma_free_chan_resources - Frees dma resources + * @chan: chan requiring attention + * + * Frees the allocated resources on this DMA chan + */ +static void intel_mid_dma_free_chan_resources(struct dma_chan *chan) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + struct middma_device *mid = to_middma_device(chan->device); + struct intel_mid_dma_desc *desc, *_desc; + + if (true == midc->busy) { + /*trying to free ch in use!!!!!*/ + pr_err("ERR_MDMA: trying to free ch in use\n"); + } + spin_lock_bh(&midc->lock); + midc->descs_allocated = 0; + list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) { + list_del(&desc->desc_node); + pci_pool_free(mid->dma_pool, desc, desc->txd.phys); + } + list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) { + list_del(&desc->desc_node); + pci_pool_free(mid->dma_pool, desc, desc->txd.phys); + } + list_for_each_entry_safe(desc, _desc, &midc->queue, desc_node) { + list_del(&desc->desc_node); + pci_pool_free(mid->dma_pool, desc, desc->txd.phys); + } + spin_unlock_bh(&midc->lock); + midc->in_use = false; + midc->busy = false; + /* Disable CH interrupts */ + iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK); + iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR); + pm_runtime_put(&mid->pdev->dev); +} + +/** + * intel_mid_dma_alloc_chan_resources - Allocate dma resources + * @chan: chan requiring attention + * + * Allocates DMA resources on this chan + * Return the descriptors allocated + */ +static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan); + struct middma_device *mid = to_middma_device(chan->device); + struct intel_mid_dma_desc *desc; + dma_addr_t phys; + int i = 0; + + pm_runtime_get_sync(&mid->pdev->dev); + + if (mid->state == SUSPENDED) { + if (dma_resume(&mid->pdev->dev)) { + pr_err("ERR_MDMA: resume failed"); + return -EFAULT; + } + } + + /* ASSERT: channel is idle */ + if (test_ch_en(mid->dma_base, midc->ch_id)) { + /*ch is not idle*/ + pr_err("ERR_MDMA: ch not idle\n"); + pm_runtime_put(&mid->pdev->dev); + return -EIO; + } + dma_cookie_init(chan); + + spin_lock_bh(&midc->lock); + while (midc->descs_allocated < DESCS_PER_CHANNEL) { + spin_unlock_bh(&midc->lock); + desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys); + if (!desc) { + pr_err("ERR_MDMA: desc failed\n"); + pm_runtime_put(&mid->pdev->dev); + return -ENOMEM; + /*check*/ + } + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = intel_mid_dma_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = phys; + spin_lock_bh(&midc->lock); + i = ++midc->descs_allocated; + list_add_tail(&desc->desc_node, &midc->free_list); + } + spin_unlock_bh(&midc->lock); + midc->in_use = true; + midc->busy = false; + pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i); + return i; +} + +/** + * midc_handle_error - Handle DMA txn error + * @mid: controller where error occurred + * @midc: chan where error occurred + * + * Scan the descriptor for error + */ +static void midc_handle_error(struct middma_device *mid, + struct intel_mid_dma_chan *midc) +{ + midc_scan_descriptors(mid, midc); +} + +/** + * dma_tasklet - DMA interrupt tasklet + * @data: tasklet arg (the controller structure) + * + * Scan the controller for interrupts for completion/error + * Clear the interrupt and call for handling completion/error + */ +static void dma_tasklet(unsigned long data) +{ + struct middma_device *mid = NULL; + struct intel_mid_dma_chan *midc = NULL; + u32 status, raw_tfr, raw_block; + int i; + + mid = (struct middma_device *)data; + if (mid == NULL) { + pr_err("ERR_MDMA: tasklet Null param\n"); + return; + } + pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id); + raw_tfr = ioread32(mid->dma_base + RAW_TFR); + raw_block = ioread32(mid->dma_base + RAW_BLOCK); + status = raw_tfr | raw_block; + status &= mid->intr_mask; + while (status) { + /*txn interrupt*/ + i = get_ch_index(&status, mid->chan_base); + if (i < 0) { + pr_err("ERR_MDMA:Invalid ch index %x\n", i); + return; + } + midc = &mid->ch[i]; + if (midc == NULL) { + pr_err("ERR_MDMA:Null param midc\n"); + return; + } + pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n", + status, midc->ch_id, i); + midc->raw_tfr = raw_tfr; + midc->raw_block = raw_block; + spin_lock_bh(&midc->lock); + /*clearing this interrupts first*/ + iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR); + if (raw_block) { + iowrite32((1 << midc->ch_id), + mid->dma_base + CLEAR_BLOCK); + } + midc_scan_descriptors(mid, midc); + pr_debug("MDMA:Scan of desc... complete, unmasking\n"); + iowrite32(UNMASK_INTR_REG(midc->ch_id), + mid->dma_base + MASK_TFR); + if (raw_block) { + iowrite32(UNMASK_INTR_REG(midc->ch_id), + mid->dma_base + MASK_BLOCK); + } + spin_unlock_bh(&midc->lock); + } + + status = ioread32(mid->dma_base + RAW_ERR); + status &= mid->intr_mask; + while (status) { + /*err interrupt*/ + i = get_ch_index(&status, mid->chan_base); + if (i < 0) { + pr_err("ERR_MDMA:Invalid ch index %x\n", i); + return; + } + midc = &mid->ch[i]; + if (midc == NULL) { + pr_err("ERR_MDMA:Null param midc\n"); + return; + } + pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n", + status, midc->ch_id, i); + + iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_ERR); + spin_lock_bh(&midc->lock); + midc_handle_error(mid, midc); + iowrite32(UNMASK_INTR_REG(midc->ch_id), + mid->dma_base + MASK_ERR); + spin_unlock_bh(&midc->lock); + } + pr_debug("MDMA:Exiting takslet...\n"); + return; +} + +static void dma_tasklet1(unsigned long data) +{ + pr_debug("MDMA:in takslet1...\n"); + return dma_tasklet(data); +} + +static void dma_tasklet2(unsigned long data) +{ + pr_debug("MDMA:in takslet2...\n"); + return dma_tasklet(data); +} + +/** + * intel_mid_dma_interrupt - DMA ISR + * @irq: IRQ where interrupt occurred + * @data: ISR cllback data (the controller structure) + * + * See if this is our interrupt if so then schedule the tasklet + * otherwise ignore + */ +static irqreturn_t intel_mid_dma_interrupt(int irq, void *data) +{ + struct middma_device *mid = data; + u32 tfr_status, err_status; + int call_tasklet = 0; + + tfr_status = ioread32(mid->dma_base + RAW_TFR); + err_status = ioread32(mid->dma_base + RAW_ERR); + if (!tfr_status && !err_status) + return IRQ_NONE; + + /*DMA Interrupt*/ + pr_debug("MDMA:Got an interrupt on irq %d\n", irq); + pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask); + tfr_status &= mid->intr_mask; + if (tfr_status) { + /*need to disable intr*/ + iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR); + iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK); + pr_debug("MDMA: Calling tasklet %x\n", tfr_status); + call_tasklet = 1; + } + err_status &= mid->intr_mask; + if (err_status) { + iowrite32((err_status << INT_MASK_WE), + mid->dma_base + MASK_ERR); + call_tasklet = 1; + } + if (call_tasklet) + tasklet_schedule(&mid->tasklet); + + return IRQ_HANDLED; +} + +static irqreturn_t intel_mid_dma_interrupt1(int irq, void *data) +{ + return intel_mid_dma_interrupt(irq, data); +} + +static irqreturn_t intel_mid_dma_interrupt2(int irq, void *data) +{ + return intel_mid_dma_interrupt(irq, data); +} + +/** + * mid_setup_dma - Setup the DMA controller + * @pdev: Controller PCI device structure + * + * Initialize the DMA controller, channels, registers with DMA engine, + * ISR. Initialize DMA controller channels. + */ +static int mid_setup_dma(struct pci_dev *pdev) +{ + struct middma_device *dma = pci_get_drvdata(pdev); + int err, i; + + /* DMA coherent memory pool for DMA descriptor allocations */ + dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev, + sizeof(struct intel_mid_dma_desc), + 32, 0); + if (NULL == dma->dma_pool) { + pr_err("ERR_MDMA:pci_pool_create failed\n"); + err = -ENOMEM; + goto err_dma_pool; + } + + INIT_LIST_HEAD(&dma->common.channels); + dma->pci_id = pdev->device; + if (dma->pimr_mask) { + dma->mask_reg = ioremap(LNW_PERIPHRAL_MASK_BASE, + LNW_PERIPHRAL_MASK_SIZE); + if (dma->mask_reg == NULL) { + pr_err("ERR_MDMA:Can't map periphral intr space !!\n"); + err = -ENOMEM; + goto err_ioremap; + } + } else + dma->mask_reg = NULL; + + pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan); + /*init CH structures*/ + dma->intr_mask = 0; + dma->state = RUNNING; + for (i = 0; i < dma->max_chan; i++) { + struct intel_mid_dma_chan *midch = &dma->ch[i]; + + midch->chan.device = &dma->common; + dma_cookie_init(&midch->chan); + midch->ch_id = dma->chan_base + i; + pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id); + + midch->dma_base = dma->dma_base; + midch->ch_regs = dma->dma_base + DMA_CH_SIZE * midch->ch_id; + midch->dma = dma; + dma->intr_mask |= 1 << (dma->chan_base + i); + spin_lock_init(&midch->lock); + + INIT_LIST_HEAD(&midch->active_list); + INIT_LIST_HEAD(&midch->queue); + INIT_LIST_HEAD(&midch->free_list); + /*mask interrupts*/ + iowrite32(MASK_INTR_REG(midch->ch_id), + dma->dma_base + MASK_BLOCK); + iowrite32(MASK_INTR_REG(midch->ch_id), + dma->dma_base + MASK_SRC_TRAN); + iowrite32(MASK_INTR_REG(midch->ch_id), + dma->dma_base + MASK_DST_TRAN); + iowrite32(MASK_INTR_REG(midch->ch_id), + dma->dma_base + MASK_ERR); + iowrite32(MASK_INTR_REG(midch->ch_id), + dma->dma_base + MASK_TFR); + + disable_dma_interrupt(midch); + list_add_tail(&midch->chan.device_node, &dma->common.channels); + } + pr_debug("MDMA: Calc Mask as %x for this controller\n", dma->intr_mask); + + /*init dma structure*/ + dma_cap_zero(dma->common.cap_mask); + dma_cap_set(DMA_MEMCPY, dma->common.cap_mask); + dma_cap_set(DMA_SLAVE, dma->common.cap_mask); + dma_cap_set(DMA_PRIVATE, dma->common.cap_mask); + dma->common.dev = &pdev->dev; + + dma->common.device_alloc_chan_resources = + intel_mid_dma_alloc_chan_resources; + dma->common.device_free_chan_resources = + intel_mid_dma_free_chan_resources; + + dma->common.device_tx_status = intel_mid_dma_tx_status; + dma->common.device_prep_dma_memcpy = intel_mid_dma_prep_memcpy; + dma->common.device_issue_pending = intel_mid_dma_issue_pending; + dma->common.device_prep_slave_sg = intel_mid_dma_prep_slave_sg; + dma->common.device_control = intel_mid_dma_device_control; + + /*enable dma cntrl*/ + iowrite32(REG_BIT0, dma->dma_base + DMA_CFG); + + /*register irq */ + if (dma->pimr_mask) { + pr_debug("MDMA:Requesting irq shared for DMAC1\n"); + err = request_irq(pdev->irq, intel_mid_dma_interrupt1, + IRQF_SHARED, "INTEL_MID_DMAC1", dma); + if (0 != err) + goto err_irq; + } else { + dma->intr_mask = 0x03; + pr_debug("MDMA:Requesting irq for DMAC2\n"); + err = request_irq(pdev->irq, intel_mid_dma_interrupt2, + IRQF_SHARED, "INTEL_MID_DMAC2", dma); + if (0 != err) + goto err_irq; + } + /*register device w/ engine*/ + err = dma_async_device_register(&dma->common); + if (0 != err) { + pr_err("ERR_MDMA:device_register failed: %d\n", err); + goto err_engine; + } + if (dma->pimr_mask) { + pr_debug("setting up tasklet1 for DMAC1\n"); + tasklet_init(&dma->tasklet, dma_tasklet1, (unsigned long)dma); + } else { + pr_debug("setting up tasklet2 for DMAC2\n"); + tasklet_init(&dma->tasklet, dma_tasklet2, (unsigned long)dma); + } + return 0; + +err_engine: + free_irq(pdev->irq, dma); +err_irq: + if (dma->mask_reg) + iounmap(dma->mask_reg); +err_ioremap: + pci_pool_destroy(dma->dma_pool); +err_dma_pool: + pr_err("ERR_MDMA:setup_dma failed: %d\n", err); + return err; + +} + +/** + * middma_shutdown - Shutdown the DMA controller + * @pdev: Controller PCI device structure + * + * Called by remove + * Unregister DMa controller, clear all structures and free interrupt + */ +static void middma_shutdown(struct pci_dev *pdev) +{ + struct middma_device *device = pci_get_drvdata(pdev); + + dma_async_device_unregister(&device->common); + pci_pool_destroy(device->dma_pool); + if (device->mask_reg) + iounmap(device->mask_reg); + if (device->dma_base) + iounmap(device->dma_base); + free_irq(pdev->irq, device); + return; +} + +/** + * intel_mid_dma_probe - PCI Probe + * @pdev: Controller PCI device structure + * @id: pci device id structure + * + * Initialize the PCI device, map BARs, query driver data. + * Call setup_dma to complete contoller and chan initilzation + */ +static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct middma_device *device; + u32 base_addr, bar_size; + struct intel_mid_dma_probe_info *info; + int err; + + pr_debug("MDMA: probe for %x\n", pdev->device); + info = (void *)id->driver_data; + pr_debug("MDMA: CH %d, base %d, block len %d, Periphral mask %x\n", + info->max_chan, info->ch_base, + info->block_size, info->pimr_mask); + + err = pci_enable_device(pdev); + if (err) + goto err_enable_device; + + err = pci_request_regions(pdev, "intel_mid_dmac"); + if (err) + goto err_request_regions; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + goto err_set_dma_mask; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + goto err_set_dma_mask; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + pr_err("ERR_MDMA:kzalloc failed probe\n"); + err = -ENOMEM; + goto err_kzalloc; + } + device->pdev = pci_dev_get(pdev); + + base_addr = pci_resource_start(pdev, 0); + bar_size = pci_resource_len(pdev, 0); + device->dma_base = ioremap_nocache(base_addr, DMA_REG_SIZE); + if (!device->dma_base) { + pr_err("ERR_MDMA:ioremap failed\n"); + err = -ENOMEM; + goto err_ioremap; + } + pci_set_drvdata(pdev, device); + pci_set_master(pdev); + device->max_chan = info->max_chan; + device->chan_base = info->ch_base; + device->block_size = info->block_size; + device->pimr_mask = info->pimr_mask; + + err = mid_setup_dma(pdev); + if (err) + goto err_dma; + + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_allow(&pdev->dev); + return 0; + +err_dma: + iounmap(device->dma_base); +err_ioremap: + pci_dev_put(pdev); + kfree(device); +err_kzalloc: +err_set_dma_mask: + pci_release_regions(pdev); + pci_disable_device(pdev); +err_request_regions: +err_enable_device: + pr_err("ERR_MDMA:Probe failed %d\n", err); + return err; +} + +/** + * intel_mid_dma_remove - PCI remove + * @pdev: Controller PCI device structure + * + * Free up all resources and data + * Call shutdown_dma to complete contoller and chan cleanup + */ +static void __devexit intel_mid_dma_remove(struct pci_dev *pdev) +{ + struct middma_device *device = pci_get_drvdata(pdev); + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_forbid(&pdev->dev); + middma_shutdown(pdev); + pci_dev_put(pdev); + kfree(device); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +/* Power Management */ +/* +* dma_suspend - PCI suspend function +* +* @pci: PCI device structure +* @state: PM message +* +* This function is called by OS when a power event occurs +*/ +static int dma_suspend(struct device *dev) +{ + struct pci_dev *pci = to_pci_dev(dev); + int i; + struct middma_device *device = pci_get_drvdata(pci); + pr_debug("MDMA: dma_suspend called\n"); + + for (i = 0; i < device->max_chan; i++) { + if (device->ch[i].in_use) + return -EAGAIN; + } + dmac1_mask_periphral_intr(device); + device->state = SUSPENDED; + pci_save_state(pci); + pci_disable_device(pci); + pci_set_power_state(pci, PCI_D3hot); + return 0; +} + +/** +* dma_resume - PCI resume function +* +* @pci: PCI device structure +* +* This function is called by OS when a power event occurs +*/ +int dma_resume(struct device *dev) +{ + struct pci_dev *pci = to_pci_dev(dev); + int ret; + struct middma_device *device = pci_get_drvdata(pci); + + pr_debug("MDMA: dma_resume called\n"); + pci_set_power_state(pci, PCI_D0); + pci_restore_state(pci); + ret = pci_enable_device(pci); + if (ret) { + pr_err("MDMA: device can't be enabled for %x\n", pci->device); + return ret; + } + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + return 0; +} + +static int dma_runtime_suspend(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = SUSPENDED; + return 0; +} + +static int dma_runtime_resume(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + return 0; +} + +static int dma_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct middma_device *device = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < device->max_chan; i++) { + if (device->ch[i].in_use) + return -EAGAIN; + } + + return pm_schedule_suspend(dev, 0); +} + +/****************************************************************************** +* PCI stuff +*/ +static struct pci_device_id intel_mid_dma_ids[] = { + { PCI_VDEVICE(INTEL, INTEL_MID_DMAC1_ID), INFO(2, 6, 4095, 0x200020)}, + { PCI_VDEVICE(INTEL, INTEL_MID_DMAC2_ID), INFO(2, 0, 2047, 0)}, + { PCI_VDEVICE(INTEL, INTEL_MID_GP_DMAC2_ID), INFO(2, 0, 2047, 0)}, + { PCI_VDEVICE(INTEL, INTEL_MFLD_DMAC1_ID), INFO(4, 0, 4095, 0x400040)}, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids); + +static const struct dev_pm_ops intel_mid_dma_pm = { + .runtime_suspend = dma_runtime_suspend, + .runtime_resume = dma_runtime_resume, + .runtime_idle = dma_runtime_idle, + .suspend = dma_suspend, + .resume = dma_resume, +}; + +static struct pci_driver intel_mid_dma_pci_driver = { + .name = "Intel MID DMA", + .id_table = intel_mid_dma_ids, + .probe = intel_mid_dma_probe, + .remove = __devexit_p(intel_mid_dma_remove), +#ifdef CONFIG_PM + .driver = { + .pm = &intel_mid_dma_pm, + }, +#endif +}; + +static int __init intel_mid_dma_init(void) +{ + pr_debug("INFO_MDMA: LNW DMA Driver Version %s\n", + INTEL_MID_DMA_DRIVER_VERSION); + return pci_register_driver(&intel_mid_dma_pci_driver); +} +fs_initcall(intel_mid_dma_init); + +static void __exit intel_mid_dma_exit(void) +{ + pci_unregister_driver(&intel_mid_dma_pci_driver); +} +module_exit(intel_mid_dma_exit); + +MODULE_AUTHOR("Vinod Koul <vinod.koul@intel.com>"); +MODULE_DESCRIPTION("Intel (R) MID DMAC Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(INTEL_MID_DMA_DRIVER_VERSION); diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h new file mode 100644 index 00000000..1bfa9268 --- /dev/null +++ b/drivers/dma/intel_mid_dma_regs.h @@ -0,0 +1,299 @@ +/* + * intel_mid_dma_regs.h - Intel MID DMA Drivers + * + * Copyright (C) 2008-10 Intel Corp + * Author: Vinod Koul <vinod.koul@intel.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * + */ +#ifndef __INTEL_MID_DMAC_REGS_H__ +#define __INTEL_MID_DMAC_REGS_H__ + +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/pci_ids.h> + +#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0" + +#define REG_BIT0 0x00000001 +#define REG_BIT8 0x00000100 +#define INT_MASK_WE 0x8 +#define CLEAR_DONE 0xFFFFEFFF +#define UNMASK_INTR_REG(chan_num) \ + ((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num)) +#define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num) + +#define ENABLE_CHANNEL(chan_num) \ + ((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num)) + +#define DISABLE_CHANNEL(chan_num) \ + (REG_BIT8 << chan_num) + +#define DESCS_PER_CHANNEL 16 +/*DMA Registers*/ +/*registers associated with channel programming*/ +#define DMA_REG_SIZE 0x400 +#define DMA_CH_SIZE 0x58 + +/*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/ +#define SAR 0x00 /* Source Address Register*/ +#define DAR 0x08 /* Destination Address Register*/ +#define LLP 0x10 /* Linked List Pointer Register*/ +#define CTL_LOW 0x18 /* Control Register*/ +#define CTL_HIGH 0x1C /* Control Register*/ +#define CFG_LOW 0x40 /* Configuration Register Low*/ +#define CFG_HIGH 0x44 /* Configuration Register high*/ + +#define STATUS_TFR 0x2E8 +#define STATUS_BLOCK 0x2F0 +#define STATUS_ERR 0x308 + +#define RAW_TFR 0x2C0 +#define RAW_BLOCK 0x2C8 +#define RAW_ERR 0x2E0 + +#define MASK_TFR 0x310 +#define MASK_BLOCK 0x318 +#define MASK_SRC_TRAN 0x320 +#define MASK_DST_TRAN 0x328 +#define MASK_ERR 0x330 + +#define CLEAR_TFR 0x338 +#define CLEAR_BLOCK 0x340 +#define CLEAR_SRC_TRAN 0x348 +#define CLEAR_DST_TRAN 0x350 +#define CLEAR_ERR 0x358 + +#define INTR_STATUS 0x360 +#define DMA_CFG 0x398 +#define DMA_CHAN_EN 0x3A0 + +/*DMA channel control registers*/ +union intel_mid_dma_ctl_lo { + struct { + u32 int_en:1; /*enable or disable interrupts*/ + /*should be 0*/ + u32 dst_tr_width:3; /*destination transfer width*/ + /*usually 32 bits = 010*/ + u32 src_tr_width:3; /*source transfer width*/ + /*usually 32 bits = 010*/ + u32 dinc:2; /*destination address inc/dec*/ + /*For mem:INC=00, Periphral NoINC=11*/ + u32 sinc:2; /*source address inc or dec, as above*/ + u32 dst_msize:3; /*destination burst transaction length*/ + /*always = 16 ie 011*/ + u32 src_msize:3; /*source burst transaction length*/ + /*always = 16 ie 011*/ + u32 reser1:3; + u32 tt_fc:3; /*transfer type and flow controller*/ + /*M-M = 000 + P-M = 010 + M-P = 001*/ + u32 dms:2; /*destination master select = 0*/ + u32 sms:2; /*source master select = 0*/ + u32 llp_dst_en:1; /*enable/disable destination LLP = 0*/ + u32 llp_src_en:1; /*enable/disable source LLP = 0*/ + u32 reser2:3; + } ctlx; + u32 ctl_lo; +}; + +union intel_mid_dma_ctl_hi { + struct { + u32 block_ts:12; /*block transfer size*/ + u32 done:1; /*Done - updated by DMAC*/ + u32 reser:19; /*configured by DMAC*/ + } ctlx; + u32 ctl_hi; + +}; + +/*DMA channel configuration registers*/ +union intel_mid_dma_cfg_lo { + struct { + u32 reser1:5; + u32 ch_prior:3; /*channel priority = 0*/ + u32 ch_susp:1; /*channel suspend = 0*/ + u32 fifo_empty:1; /*FIFO empty or not R bit = 0*/ + u32 hs_sel_dst:1; /*select HW/SW destn handshaking*/ + /*HW = 0, SW = 1*/ + u32 hs_sel_src:1; /*select HW/SW src handshaking*/ + u32 reser2:6; + u32 dst_hs_pol:1; /*dest HS interface polarity*/ + u32 src_hs_pol:1; /*src HS interface polarity*/ + u32 max_abrst:10; /*max AMBA burst len = 0 (no sw limit*/ + u32 reload_src:1; /*auto reload src addr =1 if src is P*/ + u32 reload_dst:1; /*AR destn addr =1 if dstn is P*/ + } cfgx; + u32 cfg_lo; +}; + +union intel_mid_dma_cfg_hi { + struct { + u32 fcmode:1; /*flow control mode = 1*/ + u32 fifo_mode:1; /*FIFO mode select = 1*/ + u32 protctl:3; /*protection control = 0*/ + u32 rsvd:2; + u32 src_per:4; /*src hw HS interface*/ + u32 dst_per:4; /*dstn hw HS interface*/ + u32 reser2:17; + } cfgx; + u32 cfg_hi; +}; + + +/** + * struct intel_mid_dma_chan - internal mid representation of a DMA channel + * @chan: dma_chan strcture represetation for mid chan + * @ch_regs: MMIO register space pointer to channel register + * @dma_base: MMIO register space DMA engine base pointer + * @ch_id: DMA channel id + * @lock: channel spinlock + * @active_list: current active descriptors + * @queue: current queued up descriptors + * @free_list: current free descriptors + * @slave: dma slave struture + * @descs_allocated: total number of decsiptors allocated + * @dma: dma device struture pointer + * @busy: bool representing if ch is busy (active txn) or not + * @in_use: bool representing if ch is in use or not + * @raw_tfr: raw trf interrupt received + * @raw_block: raw block interrupt received + */ +struct intel_mid_dma_chan { + struct dma_chan chan; + void __iomem *ch_regs; + void __iomem *dma_base; + int ch_id; + spinlock_t lock; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int descs_allocated; + struct middma_device *dma; + bool busy; + bool in_use; + u32 raw_tfr; + u32 raw_block; + struct intel_mid_dma_slave *mid_slave; +}; + +static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan( + struct dma_chan *chan) +{ + return container_of(chan, struct intel_mid_dma_chan, chan); +} + +enum intel_mid_dma_state { + RUNNING = 0, + SUSPENDED, +}; +/** + * struct middma_device - internal representation of a DMA device + * @pdev: PCI device + * @dma_base: MMIO register space pointer of DMA + * @dma_pool: for allocating DMA descriptors + * @common: embedded struct dma_device + * @tasklet: dma tasklet for processing interrupts + * @ch: per channel data + * @pci_id: DMA device PCI ID + * @intr_mask: Interrupt mask to be used + * @mask_reg: MMIO register for periphral mask + * @chan_base: Base ch index (read from driver data) + * @max_chan: max number of chs supported (from drv_data) + * @block_size: Block size of DMA transfer supported (from drv_data) + * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data) + * @state: dma PM device state + */ +struct middma_device { + struct pci_dev *pdev; + void __iomem *dma_base; + struct pci_pool *dma_pool; + struct dma_device common; + struct tasklet_struct tasklet; + struct intel_mid_dma_chan ch[MAX_CHAN]; + unsigned int pci_id; + unsigned int intr_mask; + void __iomem *mask_reg; + int chan_base; + int max_chan; + int block_size; + unsigned int pimr_mask; + enum intel_mid_dma_state state; +}; + +static inline struct middma_device *to_middma_device(struct dma_device *common) +{ + return container_of(common, struct middma_device, common); +} + +struct intel_mid_dma_desc { + void __iomem *block; /*ch ptr*/ + struct list_head desc_node; + struct dma_async_tx_descriptor txd; + size_t len; + dma_addr_t sar; + dma_addr_t dar; + u32 cfg_hi; + u32 cfg_lo; + u32 ctl_lo; + u32 ctl_hi; + struct pci_pool *lli_pool; + struct intel_mid_dma_lli *lli; + dma_addr_t lli_phys; + unsigned int lli_length; + unsigned int current_lli; + dma_addr_t next; + enum dma_transfer_direction dirn; + enum dma_status status; + enum dma_slave_buswidth width; /*width of DMA txn*/ + enum intel_mid_dma_mode cfg_mode; /*mode configuration*/ + +}; + +struct intel_mid_dma_lli { + dma_addr_t sar; + dma_addr_t dar; + dma_addr_t llp; + u32 ctl_lo; + u32 ctl_hi; +} __attribute__ ((packed)); + +static inline int test_ch_en(void __iomem *dma, u32 ch_no) +{ + u32 en_reg = ioread32(dma + DMA_CHAN_EN); + return (en_reg >> ch_no) & 0x1; +} + +static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc + (struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct intel_mid_dma_desc, txd); +} + +static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave + (struct dma_slave_config *slave) +{ + return container_of(slave, struct intel_mid_dma_slave, dma_slave); +} + + +int dma_resume(struct device *dev); + +#endif /*__INTEL_MID_DMAC_REGS_H__*/ diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile new file mode 100644 index 00000000..0ff7270a --- /dev/null +++ b/drivers/dma/ioat/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c new file mode 100644 index 00000000..abd9038e --- /dev/null +++ b/drivers/dma/ioat/dca.c @@ -0,0 +1,684 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/dca.h> + +/* either a kernel change is needed, or we need something like this in kernel */ +#ifndef CONFIG_SMP +#include <asm/smp.h> +#undef cpu_physical_id +#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) +#endif + +#include "dma.h" +#include "registers.h" + +/* + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 + * contain the bit number of the APIC ID to map into the DCA tag. If the valid + * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. + */ +#define DCA_TAG_MAP_VALID 0x80 + +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 + +#define DCA_TAG_MAP_MASK 0xDF + +/* expected tag map bytes for I/OAT ver.2 */ +#define DCA2_TAG_MAP_BYTE0 0x80 +#define DCA2_TAG_MAP_BYTE1 0x0 +#define DCA2_TAG_MAP_BYTE2 0x81 +#define DCA2_TAG_MAP_BYTE3 0x82 +#define DCA2_TAG_MAP_BYTE4 0x82 + +/* verify if tag map matches expected values */ +static inline int dca2_tag_map_valid(u8 *tag_map) +{ + return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) && + (tag_map[1] == DCA2_TAG_MAP_BYTE1) && + (tag_map[2] == DCA2_TAG_MAP_BYTE2) && + (tag_map[3] == DCA2_TAG_MAP_BYTE3) && + (tag_map[4] == DCA2_TAG_MAP_BYTE4)); +} + +/* + * "Legacy" DCA systems do not implement the DCA register set in the + * I/OAT device. Software needs direct support for their tag mappings. + */ + +#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) +#define IOAT_TAG_MAP_LEN 8 + +static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; +static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; +static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; +static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; + +/* pack PCI B/D/F into a u16 */ +static inline u16 dcaid_from_pcidev(struct pci_dev *pci) +{ + return (pci->bus->number << 8) | pci->devfn; +} + +static int dca_enabled_in_bios(struct pci_dev *pdev) +{ + /* CPUID level 9 returns DCA configuration */ + /* Bit 0 indicates DCA enabled by the BIOS */ + unsigned long cpuid_level_9; + int res; + + cpuid_level_9 = cpuid_eax(9); + res = test_bit(0, &cpuid_level_9); + if (!res) + dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n"); + + return res; +} + +int system_has_dca_enabled(struct pci_dev *pdev) +{ + if (boot_cpu_has(X86_FEATURE_DCA)) + return dca_enabled_in_bios(pdev); + + dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); + return 0; +} + +struct ioat_dca_slot { + struct pci_dev *pdev; /* requester device */ + u16 rid; /* requester id, as used by IOAT */ +}; + +#define IOAT_DCA_MAX_REQ 6 +#define IOAT3_DCA_MAX_REQ 2 + +struct ioat_dca_priv { + void __iomem *iobase; + void __iomem *dca_base; + int max_requesters; + int requester_count; + u8 tag_map[IOAT_TAG_MAP_LEN]; + struct ioat_dca_slot req_slots[0]; +}; + +/* 5000 series chipset DCA Port Requester ID Table Entry Format + * [15:8] PCI-Express Bus Number + * [7:3] PCI-Express Device Number + * [2:0] PCI-Express Function Number + * + * 5000 series chipset DCA control register format + * [7:1] Reserved (0) + * [0] Ignore Function Number + */ + +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + writew(id, ioatdca->dca_base + (i * 4)); + /* make sure the ignore function bit is off */ + writeb(0, ioatdca->dca_base + (i * 4) + 2); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + writew(0, ioatdca->dca_base + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry, tag; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA_TAG_MAP_VALID) { + bit = entry & ~DCA_TAG_MAP_VALID; + value = (apic_id & (1 << bit)) ? 1 : 0; + } else { + value = entry ? 1 : 0; + } + tag |= (value << i); + } + return tag; +} + +static int ioat_dca_dev_managed(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + pdev = to_pci_dev(dev); + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) + return 1; + } + return 0; +} + +static struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + + +struct dca_provider * __devinit +ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + u8 *tag_map = NULL; + int i; + int err; + u8 version; + u8 max_requesters; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + /* I/OAT v1 systems must have a known tag_map to support DCA */ + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT: + tag_map = ioat_tag_map_BNB; + break; + case PCI_DEVICE_ID_INTEL_IOAT_CNB: + tag_map = ioat_tag_map_CNB; + break; + case PCI_DEVICE_ID_INTEL_IOAT_SCNB: + tag_map = ioat_tag_map_SCNB; + break; + } + break; + case PCI_VENDOR_ID_UNISYS: + switch (pdev->device) { + case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: + tag_map = ioat_tag_map_UNISYS; + break; + } + break; + } + if (tag_map == NULL) + return NULL; + + version = readb(iobase + IOAT_VER_OFFSET); + if (version == IOAT_VER_3_0) + max_requesters = IOAT3_DCA_MAX_REQ; + else + max_requesters = IOAT_DCA_MAX_REQ; + + dca = alloc_dca_provider(&ioat_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * max_requesters)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->max_requesters = max_requesters; + ioatdca->dca_base = iobase + 0x54; + + /* copy over the APIC ID to DCA tag mapping */ + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) + ioatdca->tag_map[i] = tag_map[i]; + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} + + +static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat2_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat2_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + tag = ioat_dca_get_tag(dca, dev, cpu); + tag = (~tag) & 0x1F; + return tag; +} + +static struct dca_ops ioat2_dca_ops = { + .add_requester = ioat2_dca_add_requester, + .remove_requester = ioat2_dca_remove_requester, + .get_tag = ioat2_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider * __devinit +ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u32 tag_map; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat2_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat2_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { + csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { + pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); + for (i = 0; i < 5; i++) { + bit = (tag_map >> (4 * i)) & 0x0f; + if (bit < 8) + ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; + else + ioatdca->tag_map[i] = 0; + } + + if (!dca2_tag_map_valid(ioatdca->tag_map)) { + dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, " + "disabling DCA\n"); + free_dca_provider(dca); + return NULL; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} + +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat3_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat3_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { + bit = entry & + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); + value = (apic_id & (1 << bit)) ? 1 : 0; + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; + value = (apic_id & (1 << bit)) ? 0 : 1; + } else { + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; + } + tag |= (value << i); + } + + return tag; +} + +static struct dca_ops ioat3_dca_ops = { + .add_requester = ioat3_dca_add_requester, + .remove_requester = ioat3_dca_remove_requester, + .get_tag = ioat3_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider * __devinit +ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } tag_map; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat3_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map.low = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); + tag_map.high = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); + for (i = 0; i < 8; i++) { + bit = tag_map.full >> (8 * i); + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c new file mode 100644 index 00000000..73b2b65c --- /dev/null +++ b/drivers/dma/ioat/dma.c @@ -0,0 +1,1233 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/prefetch.h> +#include <linux/i7300_idle.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + +/* internal functions */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat); +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); + +/** + * ioat_dma_do_interrupt - handler used for single vector interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +{ + struct ioatdma_device *instance = data; + struct ioat_chan_common *chan; + unsigned long attnstatus; + int bit; + u8 intrctrl; + + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); + + if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) + return IRQ_NONE; + + if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_NONE; + } + + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); + for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { + chan = ioat_chan_by_index(instance, bit); + tasklet_schedule(&chan->cleanup_task); + } + + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_HANDLED; +} + +/** + * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +{ + struct ioat_chan_common *chan = data; + + tasklet_schedule(&chan->cleanup_task); + + return IRQ_HANDLED; +} + +/* common channel initialization */ +void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c = &chan->common; + unsigned long data = (unsigned long) c; + + chan->device = device; + chan->reg_base = device->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&chan->cleanup_lock); + chan->common.device = dma; + dma_cookie_init(&chan->common); + list_add_tail(&chan->common.device_node, &dma->channels); + device->idx[idx] = chan; + init_timer(&chan->timer); + chan->timer.function = device->timer_fn; + chan->timer.data = data; + tasklet_init(&chan->cleanup_task, device->cleanup_fn, data); + tasklet_disable(&chan->cleanup_task); +} + +/** + * ioat1_dma_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat1_enumerate_channels(struct ioatdma_device *device) +{ + u8 xfercap_scale; + u32 xfercap; + int i; + struct ioat_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_scale &= 0x1f; /* bits [4:0] valid */ + xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); + dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); + +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i); + ioat->xfercap = xfercap; + spin_lock_init(&ioat->desc_lock); + INIT_LIST_HEAD(&ioat->free_desc); + INIT_LIST_HEAD(&ioat->used_desc); + } + dma->chancnt = i; + return i; +} + +/** + * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended + * descriptors to hw + * @chan: DMA channel handle + */ +static inline void +__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) +{ + void __iomem *reg_base = ioat->base.reg_base; + + dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", + __func__, ioat->pending); + ioat->pending = 0; + writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(chan); + + if (ioat->pending > 0) { + spin_lock_bh(&ioat->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); + } +} + +/** + * ioat1_reset_channel - restart a channel + * @ioat: IOAT DMA channel handle + */ +static void ioat1_reset_channel(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + void __iomem *reg_base = chan->reg_base; + u32 chansts, chanerr; + + dev_warn(to_dev(chan), "reset\n"); + chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); + chansts = *chan->completion & IOAT_CHANSTS_STATUS; + if (chanerr) { + dev_err(to_dev(chan), + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(chan), chansts, chanerr); + writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); + } + + /* + * whack it upside the head with a reset + * and wait for things to settle out. + * force the pending count to a really big negative + * to make sure no one forces an issue_pending + * while we're waiting. + */ + + ioat->pending = INT_MIN; + writeb(IOAT_CHANCMD_RESET, + reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + set_bit(IOAT_RESET_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + RESET_DELAY); +} + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *first; + struct ioat_desc_sw *chain_tail; + dma_cookie_t cookie; + + spin_lock_bh(&ioat->desc_lock); + /* cookie incr and addition to used_list must be atomic */ + cookie = dma_cookie_assign(tx); + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + + /* write address into NextDescriptor field of last desc in chain */ + first = to_ioat_desc(desc->tx_list.next); + chain_tail = to_ioat_desc(ioat->used_desc.prev); + /* make descriptor updates globally visible before chaining */ + wmb(); + chain_tail->hw->next = first->txd.phys; + list_splice_tail_init(&desc->tx_list, &ioat->used_desc); + dump_desc_dbg(ioat, chain_tail); + dump_desc_dbg(ioat, first); + + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + ioat->active += desc->hw->tx_cnt; + ioat->pending += desc->hw->tx_cnt; + if (ioat->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); + + return cookie; +} + +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ +static struct ioat_desc_sw * +ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) +{ + struct ioat_dma_descriptor *desc; + struct ioat_desc_sw *desc_sw; + struct ioatdma_device *ioatdma_device; + dma_addr_t phys; + + ioatdma_device = ioat->base.device; + desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); + if (unlikely(!desc)) + return NULL; + + desc_sw = kzalloc(sizeof(*desc_sw), flags); + if (unlikely(!desc_sw)) { + pci_pool_free(ioatdma_device->dma_pool, desc, phys); + return NULL; + } + + memset(desc, 0, sizeof(*desc)); + + INIT_LIST_HEAD(&desc_sw->tx_list); + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); + desc_sw->txd.tx_submit = ioat1_tx_submit; + desc_sw->hw = desc; + desc_sw->txd.phys = phys; + set_desc_id(desc_sw, -1); + + return desc_sw; +} + +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "ioat1: initial descriptors per channel (default: 256)"); +/** + * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ +static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *desc; + u32 chanerr; + int i; + LIST_HEAD(tmp_list); + + /* have we already been set up? */ + if (!list_empty(&ioat->free_desc)) + return ioat->desccount; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* Allocate descriptors */ + for (i = 0; i < ioat_initial_desc_count; i++) { + desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); + if (!desc) { + dev_err(to_dev(chan), "Only %d initial descriptors\n", i); + break; + } + set_desc_id(desc, i); + list_add_tail(&desc->node, &tmp_list); + } + spin_lock_bh(&ioat->desc_lock); + ioat->desccount = i; + list_splice(&tmp_list, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_dma) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + tasklet_enable(&chan->cleanup_task); + ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, ioat->desccount); + return ioat->desccount; +} + +/** + * ioat1_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat1_dma_free_chan_resources(struct dma_chan *c) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *ioatdma_device = chan->device; + struct ioat_desc_sw *desc, *_desc; + int in_use_descs = 0; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (ioat->desccount == 0) + return; + + tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); + ioat1_cleanup(ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->desc_lock); + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", + __func__, desc_id(desc)); + dump_desc_dbg(ioat, desc); + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + spin_unlock_bh(&ioat->desc_lock); + + pci_pool_free(ioatdma_device->completion_pool, + chan->completion, + chan->completion_dma); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + in_use_descs - 1); + + chan->last_completion = 0; + chan->completion_dma = 0; + ioat->pending = 0; + ioat->desccount = 0; +} + +/** + * ioat1_dma_get_next_descriptor - return the next available descriptor + * @ioat: IOAT DMA channel handle + * + * Gets the next descriptor from the chain, and must be called with the + * channel's desc_lock held. Allocates more descriptors if the channel + * has run out. + */ +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) +{ + struct ioat_desc_sw *new; + + if (!list_empty(&ioat->free_desc)) { + new = to_ioat_desc(ioat->free_desc.next); + list_del(&new->node); + } else { + /* try to get another desc */ + new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); + if (!new) { + dev_err(to_dev(&ioat->base), "alloc failed\n"); + return NULL; + } + } + dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", + __func__, desc_id(new)); + prefetch(new->hw); + return new; +} + +static struct dma_async_tx_descriptor * +ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_desc_sw *desc; + size_t copy; + LIST_HEAD(chain); + dma_addr_t src = dma_src; + dma_addr_t dest = dma_dest; + size_t total_len = len; + struct ioat_dma_descriptor *hw = NULL; + int tx_cnt = 0; + + spin_lock_bh(&ioat->desc_lock); + desc = ioat1_dma_get_next_descriptor(ioat); + do { + if (!desc) + break; + + tx_cnt++; + copy = min_t(size_t, len, ioat->xfercap); + + hw = desc->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dest; + + list_add_tail(&desc->node, &chain); + + len -= copy; + dest += copy; + src += copy; + if (len) { + struct ioat_desc_sw *next; + + async_tx_ack(&desc->txd); + next = ioat1_dma_get_next_descriptor(ioat); + hw->next = next ? next->txd.phys : 0; + dump_desc_dbg(ioat, desc); + desc = next; + } else + hw->next = 0; + } while (len); + + if (!desc) { + struct ioat_chan_common *chan = &ioat->base; + + dev_err(to_dev(chan), + "chan%d - get_next_desc failed\n", chan_num(chan)); + list_splice(&chain, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); + return NULL; + } + spin_unlock_bh(&ioat->desc_lock); + + desc->txd.flags = flags; + desc->len = total_len; + list_splice(&chain, &desc->tx_list); + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->tx_cnt = tx_cnt; + dump_desc_dbg(ioat, desc); + + return &desc->txd; +} + +static void ioat1_cleanup_event(unsigned long data) +{ + struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); + + ioat1_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw) +{ + struct pci_dev *pdev = chan->device->pdev; + size_t offset = len - hw->size; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) + ioat_unmap(pdev, hw->src_addr - offset, len, + PCI_DMA_TODEVICE, flags, 0); +} + +dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan) +{ + dma_addr_t phys_complete; + u64 completion; + + completion = *chan->completion; + phys_complete = ioat_chansts_to_addr(completion); + + dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + + if (is_ioat_halted(completion)) { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", + chanerr); + + /* TODO do something to salvage the situation */ + } + + return phys_complete; +} + +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + dma_addr_t *phys_complete) +{ + *phys_complete = ioat_get_current_completion(chan); + if (*phys_complete == chan->last_completion) + return false; + clear_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + return true; +} + +static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct list_head *_desc, *n; + struct dma_async_tx_descriptor *tx; + + dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n", + __func__, (unsigned long long) phys_complete); + list_for_each_safe(_desc, n, &ioat->used_desc) { + struct ioat_desc_sw *desc; + + prefetch(n); + desc = list_entry(_desc, typeof(*desc), node); + tx = &desc->txd; + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + dma_cookie_complete(tx); + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + ioat->active -= desc->hw->tx_cnt; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(tx)) + list_move_tail(&desc->node, &ioat->free_desc); + } else { + /* + * last used desc. Do not remove, so we can + * append from it. + */ + + /* if nothing else is pending, cancel the + * completion timeout + */ + if (n == &ioat->used_desc) { + dev_dbg(to_dev(chan), + "%s cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + } + + /* TODO check status bits? */ + break; + } + } + + chan->last_completion = phys_complete; +} + +/** + * ioat1_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + * + * To prevent lock contention we defer cleanup when the locks are + * contended with a terminal timeout that forces cleanup and catches + * completion notification errors. + */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->desc_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat1_timer_event(unsigned long data) +{ + struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; + + dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + + spin_lock_bh(&chan->cleanup_lock); + if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat->desc_lock); + + /* restart active descriptors */ + desc = to_ioat_desc(ioat->used_desc.prev); + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + + ioat->pending = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&ioat->desc_lock); + } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + dma_addr_t phys_complete; + + spin_lock_bh(&ioat->desc_lock); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat1_reset_channel(ioat); + else { + u64 status = ioat_chansts(chan); + + /* manually update the last completion address */ + if (ioat_chansts_to_addr(status) != 0) + *chan->completion = status; + + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->desc_lock); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +enum dma_status +ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct ioat_chan_common *chan = to_chan_common(c); + struct ioatdma_device *device = chan->device; + enum dma_status ret; + + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_SUCCESS) + return ret; + + device->cleanup_fn((unsigned long) c); + + return dma_cookie_status(c, cookie, txstate); +} + +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *desc; + struct ioat_dma_descriptor *hw; + + spin_lock_bh(&ioat->desc_lock); + + desc = ioat1_dma_get_next_descriptor(ioat); + + if (!desc) { + dev_err(to_dev(chan), + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat->desc_lock); + return; + } + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + hw->next = 0; + list_add_tail(&desc->node, &ioat->used_desc); + dump_desc_dbg(ioat, desc); + + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + spin_unlock_bh(&ioat->desc_lock); +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void __devinit ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @device: device to be tested + */ +int __devinit ioat_dma_self_test(struct ioatdma_device *device) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &device->common; + struct device *dev = &device->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE | + DMA_PREP_INTERRUPT; + tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, + IOAT_TEST_SIZE, flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) + != DMA_SUCCESS) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), " + "msix-single-vector, msi, intx)"); + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @device: ioat device + */ +static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +{ + struct ioat_chan_common *chan; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msix-single-vector")) + goto msix_single_vector; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = device->common.chancnt; + for (i = 0; i < msixcnt; i++) + device->msix_entries[i].entry = i; + + err = pci_enable_msix(pdev, device->msix_entries, msixcnt); + if (err < 0) + goto msi; + if (err > 0) + goto msix_single_vector; + + for (i = 0; i < msixcnt; i++) { + msix = &device->msix_entries[i]; + chan = ioat_chan_by_index(device, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &device->msix_entries[j]; + chan = ioat_chan_by_index(device, j); + devm_free_irq(dev, msix->vector, chan); + } + goto msix_single_vector; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + goto done; + +msix_single_vector: + msix = &device->msix_entries[0]; + msix->entry = 0; + err = pci_enable_msix(pdev, device->msix_entries, 1); + if (err) + goto msi; + + err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, + "ioat-msix", device); + if (err) { + pci_disable_msix(pdev); + goto msi; + } + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", device); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); + if (err) + goto err_no_irq; + +done: + if (device->intr_quirk) + device->intr_quirk(device); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *device) +{ + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); +} + +int __devinit ioat_probe(struct ioatdma_device *device) +{ + int err = -ENODEV; + struct dma_device *dma = &device->common; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + + /* DMA coherent memory pool for DMA descriptor allocations */ + device->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!device->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + device->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!device->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + device->enumerate_channels(device); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!dma->chancnt) { + dev_err(dev, "channel enumeration error\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(device); + if (err) + goto err_setup_interrupts; + + err = device->self_test(device); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(device); +err_setup_interrupts: + pci_pool_destroy(device->completion_pool); +err_completion_pool: + pci_pool_destroy(device->dma_pool); +err_dma_pool: + return err; +} + +int __devinit ioat_register(struct ioatdma_device *device) +{ + int err = dma_async_device_register(&device->common); + + if (err) { + ioat_disable_interrupts(device); + pci_pool_destroy(device->completion_pool); + pci_pool_destroy(device->dma_pool); + } + + return err; +} + +/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ +static void ioat1_intr_quirk(struct ioatdma_device *device) +{ + struct pci_dev *pdev = device->pdev; + u32 dmactrl; + + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + if (pdev->msi_enabled) + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + else + dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->desccount); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->active); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *device = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + device->version >> 4, device->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static struct attribute *ioat1_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioat_chan_common *chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + chan = container_of(kobj, struct ioat_chan_common, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&chan->common, page); +} + +const struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +static struct kobj_type ioat1_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat1_attrs, +}; + +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (err) { + dev_warn(to_dev(chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { + kobject_del(&chan->kobj); + kobject_put(&chan->kobj); + } + } +} + +int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + int err; + + device->intr_quirk = ioat1_intr_quirk; + device->enumerate_channels = ioat1_enumerate_channels; + device->self_test = ioat_dma_self_test; + device->timer_fn = ioat1_timer_event; + device->cleanup_fn = ioat1_cleanup_event; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; + dma->device_free_chan_resources = ioat1_dma_free_chan_resources; + dma->device_tx_status = ioat_dma_tx_status; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(4096); + err = ioat_register(device); + if (err) + return err; + ioat_kobject_add(device, &ioat1_ktype); + + if (dca) + device->dca = ioat_dca_init(pdev, device->reg_base); + + return err; +} + +void __devexit ioat_dma_remove(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + + ioat_disable_interrupts(device); + + ioat_kobject_del(device); + + dma_async_device_unregister(dma); + + pci_pool_destroy(device->dma_pool); + pci_pool_destroy(device->completion_pool); + + INIT_LIST_HEAD(&dma->channels); +} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h new file mode 100644 index 00000000..5e8fe01b --- /dev/null +++ b/drivers/dma/ioat/dma.h @@ -0,0 +1,327 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_H +#define IOATDMA_H + +#include <linux/dmaengine.h> +#include "hw.h" +#include "registers.h" +#include <linux/init.h> +#include <linux/dmapool.h> +#include <linux/cache.h> +#include <linux/pci_ids.h> +#include <net/tcp.h> + +#define IOAT_DMA_VERSION "4.00" + +#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 + +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) +#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) +#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +/** + * struct ioatdma_device - internal representation of a IOAT device + * @pdev: PCI-Express device + * @reg_base: MMIO register space base address + * @dma_pool: for allocating DMA descriptors + * @common: embedded struct dma_device + * @version: version of ioatdma device + * @msix_entries: irq handlers + * @idx: per channel data + * @dca: direct cache access context + * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) + * @enumerate_channels: hw version specific channel enumeration + * @reset_hw: hw version specific channel (re)initialization + * @cleanup_fn: select between the v2 and v3 cleanup routines + * @timer_fn: select between the v2 and v3 timer watchdog routines + * @self_test: hardware version specific self test for each supported op type + * + * Note: the v3 cleanup routine supports raid operations + */ +struct ioatdma_device { + struct pci_dev *pdev; + void __iomem *reg_base; + struct pci_pool *dma_pool; + struct pci_pool *completion_pool; + struct dma_device common; + u8 version; + struct msix_entry msix_entries[4]; + struct ioat_chan_common *idx[4]; + struct dca_provider *dca; + void (*intr_quirk)(struct ioatdma_device *device); + int (*enumerate_channels)(struct ioatdma_device *device); + int (*reset_hw)(struct ioat_chan_common *chan); + void (*cleanup_fn)(unsigned long data); + void (*timer_fn)(unsigned long data); + int (*self_test)(struct ioatdma_device *device); +}; + +struct ioat_chan_common { + struct dma_chan common; + void __iomem *reg_base; + dma_addr_t last_completion; + spinlock_t cleanup_lock; + unsigned long state; + #define IOAT_COMPLETION_PENDING 0 + #define IOAT_COMPLETION_ACK 1 + #define IOAT_RESET_PENDING 2 + #define IOAT_KOBJ_INIT_FAIL 3 + #define IOAT_RESHAPE_PENDING 4 + #define IOAT_RUN 5 + struct timer_list timer; + #define COMPLETION_TIMEOUT msecs_to_jiffies(100) + #define IDLE_TIMEOUT msecs_to_jiffies(2000) + #define RESET_DELAY msecs_to_jiffies(100) + struct ioatdma_device *device; + dma_addr_t completion_dma; + u64 *completion; + struct tasklet_struct cleanup_task; + struct kobject kobj; +}; + +struct ioat_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct dma_chan *, char *); +}; + +/** + * struct ioat_dma_chan - internal representation of a DMA channel + */ +struct ioat_dma_chan { + struct ioat_chan_common base; + + size_t xfercap; /* XFERCAP register value expanded out */ + + spinlock_t desc_lock; + struct list_head free_desc; + struct list_head used_desc; + + int pending; + u16 desccount; + u16 active; +}; + +static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) +{ + return container_of(c, struct ioat_chan_common, common); +} + +static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat_dma_chan, base); +} + +/* wrapper around hardware descriptor format + additional software fields */ + +/** + * struct ioat_desc_sw - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @node: this descriptor will either be on the free list, + * or attached to a transaction list (tx_list) + * @txd: the generic software descriptor for all engines + * @id: identifier for debug + */ +struct ioat_desc_sw { + struct ioat_dma_descriptor *hw; + struct list_head node; + size_t len; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + #ifdef DEBUG + int id; + #endif +}; + +#ifdef DEBUG +#define set_desc_id(desc, i) ((desc)->id = (i)) +#define desc_id(desc) ((desc)->id) +#else +#define set_desc_id(desc, i) +#define desc_id(desc) (0) +#endif + +static inline void +__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, + struct dma_async_tx_descriptor *tx, int id) +{ + struct device *dev = to_dev(chan); + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" + " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, + (unsigned long long) tx->phys, + (unsigned long long) hw->next, tx->cookie, tx->flags, + hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); +} + +#define dump_desc_dbg(c, d) \ + ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + +static inline void ioat_set_tcp_copy_break(unsigned long copybreak) +{ + #ifdef CONFIG_NET_DMA + sysctl_tcp_dma_copybreak = copybreak; + #endif +} + +static inline struct ioat_chan_common * +ioat_chan_by_index(struct ioatdma_device *device, int index) +{ + return device->idx[index]; +} + +static inline u64 ioat_chansts(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u64 status; + u32 status_lo; + + /* We need to read the low address first as this causes the + * chipset to latch the upper bits for the subsequent read + */ + status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status <<= 32; + status |= status_lo; + + return status; +} + +static inline void ioat_start(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline u64 ioat_chansts_to_addr(u64 status) +{ + return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +} + +static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +{ + return readl(chan->reg_base + IOAT_CHANERR_OFFSET); +} + +static inline void ioat_suspend(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline void ioat_reset(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u8 cmd; + + cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; +} + +static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); +} + +static inline bool is_ioat_active(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); +} + +static inline bool is_ioat_idle(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE); +} + +static inline bool is_ioat_halted(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); +} + +static inline bool is_ioat_suspended(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); +} + +/* channel was fatally programmed */ +static inline bool is_ioat_bug(unsigned long err) +{ + return !!err; +} + +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) +{ + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); +} + +int __devinit ioat_probe(struct ioatdma_device *device); +int __devinit ioat_register(struct ioatdma_device *device); +int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat_dma_self_test(struct ioatdma_device *device); +void __devexit ioat_dma_remove(struct ioatdma_device *device); +struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, + void __iomem *iobase); +dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan); +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx); +enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate); +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw); +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + dma_addr_t *phys_complete); +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *device); +extern const struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c new file mode 100644 index 00000000..86895760 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.c @@ -0,0 +1,907 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine (versions >= 2), which + * does asynchronous data movement and checksumming operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/prefetch.h> +#include <linux/i7300_idle.h> +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat2+: allocate 2^n descriptors per channel" + " (default: 8 max: 16)"); +static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat2+: upper limit for ring size (default: 16)"); + +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + + ioat->dmacount += ioat2_ring_pending(ioat); + ioat->issued = ioat->head; + writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); +} + +void ioat2_issue_pending(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat2_ring_pending(ioat)) { + spin_lock_bh(&ioat->prep_lock); + __ioat2_issue_pending(ioat); + spin_unlock_bh(&ioat->prep_lock); + } +} + +/** + * ioat2_update_pending - log pending descriptors + * @ioat: ioat2+ channel + * + * Check if the number of unsubmitted descriptors has exceeded the + * watermark. Called with prep_lock held + */ +static void ioat2_update_pending(struct ioat2_dma_chan *ioat) +{ + if (ioat2_ring_pending(ioat) > ioat_pending_level) + __ioat2_issue_pending(ioat); +} + +static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (ioat2_ring_space(ioat) < 1) { + dev_err(to_dev(&ioat->base), + "Unable to start null desc - ring full\n"); + return; + } + + dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + desc = ioat2_get_ring_ent(ioat, ioat->head); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat2_set_chainaddr(ioat, desc->txd.phys); + dump_desc_dbg(ioat, desc); + wmb(); + ioat->head += 1; + __ioat2_issue_pending(ioat); +} + +static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + spin_lock_bh(&ioat->prep_lock); + __ioat2_start_null_desc(ioat); + spin_unlock_bh(&ioat->prep_lock); +} + +static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct dma_async_tx_descriptor *tx; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int idx = ioat->tail, i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + smp_read_barrier_depends(); + prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); + desc = ioat2_get_ring_ent(ioat, idx + i); + tx = &desc->txd; + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + dma_cookie_complete(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + smp_mb(); /* finish all descriptor reads before incrementing tail */ + ioat->tail = idx + i; + BUG_ON(active && !seen_current); /* no active descs have written a completion? */ + + chan->last_completion = phys_complete; + if (active - i == 0) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +/** + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up + */ +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + spin_unlock_bh(&chan->cleanup_lock); +} + +void ioat2_cleanup_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + + ioat2_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + + /* set the tail to be re-issued */ + ioat->issued = ioat->tail; + ioat->dmacount = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); + + if (ioat2_ring_pending(ioat)) { + struct ioat_ring_ent *desc; + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + ioat2_set_chainaddr(ioat, desc->txd.phys); + __ioat2_issue_pending(ioat); + } else + __ioat2_start_null_desc(ioat); +} + +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + if (tmo && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + status = ioat_chansts(chan); + cpu_relax(); + } + + return err; +} + +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(chan); + while (ioat_reset_pending(chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + + ioat2_quiesce(chan, 0); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +void ioat2_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; + + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + dma_addr_t phys_complete; + u64 status; + + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) { + __cleanup(ioat, phys_complete); + } else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { + spin_lock_bh(&ioat->prep_lock); + ioat2_restart_channel(ioat); + spin_unlock_bh(&ioat->prep_lock); + } else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +static int ioat2_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it initialized */ + u32 chanerr; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + +/** + * ioat2_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +int ioat2_enumerate_channels(struct ioatdma_device *device) +{ + struct ioat2_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } + xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + /* FIXME which i/oat version is i7300? */ +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i); + ioat->xfercap_log = xfercap_log; + spin_lock_init(&ioat->prep_lock); + if (device->reset_hw(&ioat->base)) { + i = 0; + break; + } + } + dma->chancnt = i; + return i; +} + +static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + dma_cookie_t cookie; + + cookie = dma_cookie_assign(tx); + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + /* make descriptor updates visible before advancing ioat->head, + * this is purposefully not smp_wmb() since we are also + * publishing the descriptor updates to a dma device + */ + wmb(); + + ioat->head += ioat->produce; + + ioat2_update_pending(ioat); + spin_unlock_bh(&ioat->prep_lock); + + return cookie; +} + +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) +{ + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *dma; + dma_addr_t phys; + + dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(dma->dma_pool, flags, &phys); + if (!hw) + return NULL; + memset(hw, 0, sizeof(*hw)); + + desc = kmem_cache_alloc(ioat2_cache, flags); + if (!desc) { + pci_pool_free(dma->dma_pool, hw, phys); + return NULL; + } + memset(desc, 0, sizeof(*desc)); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat2_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} + +static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *dma; + + dma = to_ioatdma_device(chan->device); + pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); + kmem_cache_free(ioat2_cache, desc); +} + +static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) +{ + struct ioat_ring_ent **ring; + int descs = 1 << order; + int i; + + if (order > ioat_get_max_alloc_order()) + return NULL; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; +} + +void ioat2_free_chan_resources(struct dma_chan *c); + +/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring + * @chan: channel to be initialized + */ +int ioat2_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent **ring; + u64 status; + int order; + int i = 0; + + /* have we already been set up? */ + if (ioat->ring) + return 1 << ioat->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + if (!chan->completion) + return -ENOMEM; + + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_dma) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = ioat_get_alloc_order(); + ring = ioat2_alloc_ring(c, order, GFP_KERNEL); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); + ioat->ring = ring; + ioat->head = 0; + ioat->issued = 0; + ioat->tail = 0; + ioat->alloc_order = order; + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + + tasklet_enable(&chan->cleanup_task); + ioat2_start_null_desc(ioat); + + /* check that we got off the ground */ + do { + udelay(1); + status = ioat_chansts(chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + + if (is_ioat_active(status) || is_ioat_idle(status)) { + set_bit(IOAT_RUN, &chan->state); + return 1 << ioat->alloc_order; + } else { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + + dev_WARN(to_dev(chan), + "failed to start channel chanerr: %#x\n", chanerr); + ioat2_free_chan_resources(c); + return -EFAULT; + } +} + +bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +{ + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct ioat_chan_common *chan = &ioat->base; + struct dma_chan *c = &chan->common; + const u32 curr_size = ioat2_ring_size(ioat); + const u16 active = ioat2_ring_active(ioat); + const u32 new_size = 1 << order; + struct ioat_ring_ent **ring; + u16 i; + + if (order > ioat_get_max_alloc_order()) + return false; + + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring + */ + if (active >= new_size) + return false; + + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; + + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ioat2_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } + + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; + + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat2_get_ring_ent(ioat, ioat->tail+i); + ioat2_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; + } + + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, new_size); + + kfree(ioat->ring); + ioat->ring = ring; + ioat->alloc_order = order; + + return true; +} + +/** + * ioat2_check_space_lock - verify space and grab ring producer lock + * @ioat: ioat2,3 channel (ring) to operate on + * @num_descs: allocation length + */ +int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs) +{ + struct ioat_chan_common *chan = &ioat->base; + bool retry; + + retry: + spin_lock_bh(&ioat->prep_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + if (likely(ioat2_ring_space(ioat) > num_descs)) { + dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + ioat->produce = num_descs; + return 0; /* with ioat->prep_lock held */ + } + retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state); + spin_unlock_bh(&ioat->prep_lock); + + /* is another cpu already trying to expand the ring? */ + if (retry) + goto retry; + + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); + retry = reshape_ring(ioat, ioat->alloc_order + 1); + clear_bit(IOAT_RESHAPE_PENDING, &chan->state); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + + /* if we were able to expand the ring retry the allocation */ + if (retry) + goto retry; + + if (printk_ratelimit()) + dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + + /* progress reclaim in the allocation failure case we may be + * called under bh_disabled so we need to trigger the timer + * event directly + */ + if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + device->timer_fn((unsigned long) &chan->common); + } + + return -ENOMEM; +} + +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs, idx, i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + +/** + * ioat2_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +void ioat2_free_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; + struct ioat_ring_ent *desc; + const u16 total_descs = 1 << ioat->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat->ring) + return; + + tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); + device->cleanup_fn((unsigned long) c); + device->reset_hw(chan); + clear_bit(IOAT_RUN, &chan->state); + + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); + descs = ioat2_ring_space(ioat); + dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->head + i); + ioat2_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + ioat2_free_ring_ent(desc, c); + } + + kfree(ioat->ring); + ioat->ring = NULL; + ioat->alloc_order = 0; + pci_pool_free(device->completion_pool, chan->completion, + chan->completion_dma); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + + chan->last_completion = 0; + chan->completion_dma = 0; + ioat->dmacount = 0; +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat2_ring_active(ioat)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat2_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat2_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat2_attrs, +}; + +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + + device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat2_reset_hw; + device->cleanup_fn = ioat2_cleanup_event; + device->timer_fn = ioat2_timer_event; + device->self_test = ioat_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_tx_status = ioat_dma_tx_status; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + + ioat_kobject_add(device, &ioat2_ktype); + + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + return err; +} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h new file mode 100644 index 00000000..be2a55b9 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.h @@ -0,0 +1,179 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_V2_H +#define IOATDMA_V2_H + +#include <linux/dmaengine.h> +#include <linux/circ_buf.h> +#include "dma.h" +#include "hw.h" + + +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) + +/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes + * @base: common ioat channel parameters + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @produce: number of descriptors to produce at submit time + * @ring: software ring buffer implementation of hardware ring + * @prep_lock: serializes descriptor preparation (producers) + */ +struct ioat2_dma_chan { + struct ioat_chan_common base; + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + u16 produce; + struct ioat_ring_ent **ring; + spinlock_t prep_lock; +}; + +static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat2_dma_chan, base); +} + +static inline u32 ioat2_ring_size(struct ioat2_dma_chan *ioat) +{ + return 1 << ioat->alloc_order; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) +{ + return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat)); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) +{ + return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat)); +} + +static inline u32 ioat2_ring_space(struct ioat2_dma_chan *ioat) +{ + return ioat2_ring_size(ioat) - ioat2_ring_active(ioat); +} + +static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) +{ + u16 num_descs = len >> ioat->xfercap_log; + + num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); + return num_descs; +} + +/** + * struct ioat_ring_ent - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @fill: hardware fill descriptor + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor + * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations + * @id: identifier for debug + */ + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_fill_descriptor *fill; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; + size_t len; + struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; + #ifdef DEBUG + int id; + #endif +}; + +static inline struct ioat_ring_ent * +ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) +{ + return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)]; +} + +static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); +struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); +int ioat2_enumerate_channels(struct ioatdma_device *device); +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +void ioat2_issue_pending(struct dma_chan *chan); +int ioat2_alloc_chan_resources(struct dma_chan *c); +void ioat2_free_chan_resources(struct dma_chan *c); +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); +bool reshape_ring(struct ioat2_dma_chan *ioat, int order); +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); +void ioat2_cleanup_event(unsigned long data); +void ioat2_timer_event(unsigned long data); +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); +extern struct kobj_type ioat2_ktype; +extern struct kmem_cache *ioat2_cache; +#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 00000000..f7f1dc62 --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c @@ -0,0 +1,1302 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * BSD LICENSE + * + * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Support routines for v3+ hardware + */ + +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/prefetch.h> +#include "../dmaengine.h" +#include "registers.h" +#include "hw.h" +#include "dma.h" +#include "dma_v2.h" + +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; + +static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + return raw->field[xor_idx_to_field[idx]]; +} + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc, int idx) +{ + struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = chan->device->pdev; + size_t len = desc->len; + size_t offset = len - desc->hw->size; + struct dma_async_tx_descriptor *tx = &desc->txd; + enum dma_ctrl_flags flags = tx->flags; + + switch (desc->hw->ctl_f.op) { + case IOAT_OP_COPY: + if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ + ioat_dma_unmap(chan, flags, len, desc->hw); + break; + case IOAT_OP_FILL: { + struct ioat_fill_descriptor *hw = desc->fill; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + case IOAT_OP_XOR_VAL: + case IOAT_OP_XOR: { + struct ioat_xor_descriptor *xor = desc->xor; + struct ioat_ring_ent *ext; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 5) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + xor_ex = ext->xor_ex; + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = xor_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* dest is a source in xor validate operations */ + if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_TODEVICE, flags, 1); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ: { + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_ring_ent *ext; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 3) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + pq_ex = ext->pq_ex; + } + + /* in the 'continue' case don't unmap the dests as sources */ + if (dmaf_p_disabled_continue(flags)) + src_cnt--; + else if (dmaf_continue(flags)) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = pq_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* the dests are sources in pq validate operations */ + if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + } + break; + } + default: + dev_err(&pdev->dev, "%s: unknown op type: %#x\n", + __func__, desc->hw->ctl_f.op); + } +} + +static bool desc_has_ext(struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; + + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; + + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; + } + + return false; +} + +/** + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean + * + * The difference from the dma_v2.c __cleanup() is that this routine + * handles extended descriptors and dma-unmapping raid operations. + */ +static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *desc; + bool seen_current = false; + int idx = ioat->tail, i; + u16 active; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; + + smp_read_barrier_depends(); + prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); + desc = ioat2_get_ring_ent(ioat, idx + i); + dump_desc_dbg(ioat, desc); + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + ioat3_dma_unmap(ioat, desc, idx + i); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } + } + smp_mb(); /* finish all descriptor reads before incrementing tail */ + ioat->tail = idx + i; + BUG_ON(active && !seen_current); /* no active descs have written a completion? */ + chan->last_completion = phys_complete; + + if (active - i == 0) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + /* 5 microsecond delay per pending descriptor */ + writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), + chan->device->reg_base + IOAT_INTRDELAY_OFFSET); +} + +static void ioat3_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat3_cleanup_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + + ioat3_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + dma_addr_t phys_complete; + + ioat2_quiesce(chan, 0); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +static void ioat3_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; + + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + dma_addr_t phys_complete; + u64 status; + + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { + spin_lock_bh(&ioat->prep_lock); + ioat3_restart_channel(ioat); + spin_unlock_bh(&ioat->prep_lock); + } else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +static enum dma_status +ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + enum dma_status ret; + + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_SUCCESS) + return ret; + + ioat3_cleanup(ioat); + + return dma_cookie_status(c, cookie, txstate); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_fill_descriptor *fill; + u64 src_data = (0x0101010101010101ULL) * (value & 0xff); + int num_descs, idx, i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; + else + return NULL; + i = 0; + do { + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + fill = desc->fill; + + fill->size = xfer_size; + fill->src_data = src_data; + fill->dst_addr = dest; + fill->ctl = 0; + fill->ctl_f.op = IOAT_OP_FILL; + + len -= xfer_size; + dest += xfer_size; + dump_desc_dbg(ioat, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + fill->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; + u32 offset = 0; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) + idx = ioat->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + int s; + + desc = ioat2_get_ring_ent(ioat, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; + + dev_dbg(to_dev(chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. + */ + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_check_space_lock(ioat, num_descs+1) == 0) + idx = ioat->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat, desc, ext); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + + /* handle the single source multiply case from the raid6 + * recovery path + */ + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + } else + return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, + len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + memset(scf, 0, src_cnt); + pq[0] = dst; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ + + return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + pq[0] = src[0]; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ + + return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, + len, flags); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (ioat2_check_space_lock(ioat, 1) == 0) + desc = ioat2_get_ring_ent(ioat, ioat->head); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static void __devinit ioat3_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto free_resources; + } + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + /* skip memset if the capability is not present */ + if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) + goto free_resources; + + /* test memset */ + dma_addr = dma_map_page(dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, + DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test memset prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test memset setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test memset timed out\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_err(dev, "Self-test memset failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) +{ + int rc = ioat_dma_self_test(device); + + if (rc) + return rc; + + rc = ioat_xor_val_self_test(device); + if (rc) + return rc; + + return 0; +} + +static int ioat3_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3, and clear any + * pending errors + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + +static bool is_jf_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} + +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; + } +} + +int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + int dca_en = system_has_dca_enabled(pdev); + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + bool is_raid_device = false; + int err; + u32 cap; + + device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat3_reset_hw; + device->self_test = ioat3_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + + if (is_jf_ioat(pdev) || is_snb_ioat(pdev)) + dma->copy_align = 6; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; + + cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + + /* dca is incompatible with raid operations */ + if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + + if (cap & IOAT_CAP_XOR) { + is_raid_device = true; + dma->max_xor = 8; + dma->xor_align = 6; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_xor_val; + } + if (cap & IOAT_CAP_PQ) { + is_raid_device = true; + dma_set_maxpq(dma, 8, 0); + dma->pq_align = 6; + + dma_cap_set(DMA_PQ, dma->cap_mask); + dma->device_prep_dma_pq = ioat3_prep_pq; + + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + + if (!(cap & IOAT_CAP_XOR)) { + dma->max_xor = 8; + dma->xor_align = 6; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_pqxor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; + } + } + if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = ioat3_prep_memset_lock; + } + + + if (is_raid_device) { + dma->device_tx_status = ioat3_tx_status; + device->cleanup_fn = ioat3_cleanup_event; + device->timer_fn = ioat3_timer_event; + } else { + dma->device_tx_status = ioat_dma_tx_status; + device->cleanup_fn = ioat2_cleanup_event; + device->timer_fn = ioat2_timer_event; + } + + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = NULL; + #endif + + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = NULL; + #endif + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + + ioat_kobject_add(device, &ioat2_ktype); + + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return 0; +} diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h new file mode 100644 index 00000000..60e67545 --- /dev/null +++ b/drivers/dma/ioat/hw.h @@ -0,0 +1,217 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_HW_H_ +#define _IOAT_HW_H_ + +/* PCI Configuration Space Values */ +#define IOAT_PCI_VID 0x8086 +#define IOAT_MMIO_BAR 0 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ +#define IOAT_VER_3_2 0x32 /* Version 3.2 */ + +int system_has_dca_enabled(struct pci_dev *pdev); + +struct ioat_dma_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int null:1; + unsigned int src_brk:1; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd2:13; + #define IOAT_OP_COPY 0x00 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t rsv2; + /* store some driver data in an unused portion of the descriptor */ + union { + uint64_t user1; + uint64_t tx_cnt; + }; + uint64_t user2; +}; + +struct ioat_fill_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int rsvd:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int rsvd2:2; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int rsvd4:15; + #define IOAT_OP_FILL 0x01 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_data; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t next_dst_addr; + uint64_t user1; + uint64_t user2; +}; + +struct ioat_xor_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd:13; + #define IOAT_OP_XOR 0x87 + #define IOAT_OP_XOR_VAL 0x88 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; +}; + +struct ioat_xor_ext_descriptor { + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t next; + uint64_t rsvd[4]; +}; + +struct ioat_pq_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:11; + #define IOAT_OP_PQ 0x89 + #define IOAT_OP_PQ_VAL 0x8a + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint8_t coef[8]; + uint64_t q_addr; +}; + +struct ioat_pq_ext_descriptor { + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t next; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t rsvd[2]; +}; + +struct ioat_pq_update_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:3; + unsigned int coef:8; + #define IOAT_OP_PQ_UP 0x8b + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t p_src; + uint64_t q_src; + uint64_t q_addr; +}; + +struct ioat_raw_descriptor { + uint64_t field[8]; +}; +#endif diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c new file mode 100644 index 00000000..5e3a40f7 --- /dev/null +++ b/drivers/dma/ioat/pci.c @@ -0,0 +1,217 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dca.h> +#include <linux/slab.h> +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void __devexit ioat_remove(struct pci_dev *pdev); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); + +struct kmem_cache *ioat2_cache; + +#define DRV_NAME "ioatdma" + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = __devexit_p(ioat_remove), +}; + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_master(pdev); + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version == IOAT_VER_1_2) + err = ioat1_dma_probe(device, ioat_dca_enabled); + else if (device->version == IOAT_VER_2_0) + err = ioat2_dma_probe(device, ioat_dca_enabled); + else if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } + + return 0; +} + +static void __devexit ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat2_cache) + return -ENOMEM; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + kmem_cache_destroy(ioat2_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat2_cache); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h new file mode 100644 index 00000000..13917985 --- /dev/null +++ b/drivers/dma/ioat/registers.h @@ -0,0 +1,249 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_REGISTERS_H_ +#define _IOAT_REGISTERS_H_ + +#define IOAT_PCI_DMACTRL_OFFSET 0x48 +#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 +#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 + +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERR_INT_OFFSET 0x180 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 + +/* MMIO Device Registers */ +#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ + +#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */ +#define IOAT_XFERCAP_4KB 12 +#define IOAT_XFERCAP_8KB 13 +#define IOAT_XFERCAP_16KB 14 +#define IOAT_XFERCAP_32KB 15 +#define IOAT_XFERCAP_32GB 0 + +#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */ +#define IOAT_GENCTRL_DEBUG_EN 0x01 + +#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */ +#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ +#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ +#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ +#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ + +#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ + +#define IOAT_VER_OFFSET 0x08 /* 8-bit */ +#define IOAT_VER_MAJOR_MASK 0xF0 +#define IOAT_VER_MINOR_MASK 0x0F +#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) +#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) + +#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ + +#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ +#define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */ +#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ + +#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ +#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 +#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002 +#define IOAT_DEVICE_MEMORY_BYPASS 0x0004 +#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008 + +#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */ +#define IOAT_CAP_PAGE_BREAK 0x00000001 +#define IOAT_CAP_CRC 0x00000002 +#define IOAT_CAP_SKIP_MARKER 0x00000004 +#define IOAT_CAP_DCA 0x00000010 +#define IOAT_CAP_CRC_MOVE 0x00000020 +#define IOAT_CAP_FILL_BLOCK 0x00000040 +#define IOAT_CAP_APIC 0x00000080 +#define IOAT_CAP_XOR 0x00000100 +#define IOAT_CAP_PQ 0x00000200 + +#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ + +/* DMA Channel Registers */ +#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ +#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200 +#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 +#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 +#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 +#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 +#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 +#define IOAT_CHANCTRL_INT_REARM 0x0001 +#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ANY_ERR_ABORT_EN) + +#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ +#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ +#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ + + +#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ +#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ +#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET) +#define IOAT1_CHANSTS_OFFSET_LOW 0x04 +#define IOAT2_CHANSTS_OFFSET_LOW 0x08 +#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW) +#define IOAT1_CHANSTS_OFFSET_HIGH 0x08 +#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C +#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) +#define IOAT_CHANSTS_SOFT_ERR 0x10ULL +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL +#define IOAT_CHANSTS_STATUS 0x7ULL +#define IOAT_CHANSTS_ACTIVE 0x0 +#define IOAT_CHANSTS_DONE 0x1 +#define IOAT_CHANSTS_SUSPENDED 0x2 +#define IOAT_CHANSTS_HALTED 0x3 + + + +#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ + +#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ +#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 +#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ + +/* CB DCA Memory Space Registers */ +#define IOAT_DCAOFFSET_OFFSET 0x14 +/* CB_BAR + IOAT_DCAOFFSET value */ +#define IOAT_DCA_VER_OFFSET 0x00 +#define IOAT_DCA_VER_MAJOR_MASK 0xF0 +#define IOAT_DCA_VER_MINOR_MASK 0x0F + +#define IOAT_DCA_COMP_OFFSET 0x02 +#define IOAT_DCA_COMP_V1 0x1 + +#define IOAT_FSB_CAPABILITY_OFFSET 0x04 +#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 + +#define IOAT_PCI_CAPABILITY_OFFSET 0x06 +#define IOAT_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 +#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 + +#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A +#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 + +#define IOAT_APICID_TAG_MAP_OFFSET 0x0C +#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F +#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 +#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 +#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 +#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 +#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 +#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 +#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 +#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 +#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 +#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 + +#define IOAT_DCA_GREQID_OFFSET 0x10 +#define IOAT_DCA_GREQID_SIZE 0x04 +#define IOAT_DCA_GREQID_MASK 0xFFFF +#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 +#define IOAT_DCA_GREQID_VALID 0x20000000 +#define IOAT_DCA_GREQID_LASTID 0x80000000 + +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 + +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT3_CSI_CONTROL_OFFSET 0x0C +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 + +#define IOAT3_PCI_CONTROL_OFFSET 0x0E +#define IOAT3_PCI_CONTROL_MEMWR 0x1 + +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 + +#define IOAT3_DCA_GREQID_OFFSET 0x02 + +#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ +#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ +#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) +#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C +#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 +#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) +#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 +#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 +#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) + +#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ +#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ +#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) +#define IOAT_CHANCMD_RESET 0x20 +#define IOAT_CHANCMD_RESUME 0x10 +#define IOAT_CHANCMD_ABORT 0x08 +#define IOAT_CHANCMD_SUSPEND 0x04 +#define IOAT_CHANCMD_APPEND 0x02 +#define IOAT_CHANCMD_START 0x01 + +#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */ +#define IOAT_CHANCMP_OFFSET_LOW 0x18 +#define IOAT_CHANCMP_OFFSET_HIGH 0x1C + +#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */ +#define IOAT_CDAR_OFFSET_LOW 0x20 +#define IOAT_CDAR_OFFSET_HIGH 0x24 + +#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ +#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008 +#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 +#define IOAT_CHANERR_CHANCMD_ERR 0x0020 +#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 +#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 +#define IOAT_CHANERR_READ_DATA_ERR 0x0100 +#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 +#define IOAT_CHANERR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 +#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 +#define IOAT_CHANERR_SOFT_ERR 0x4000 +#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 +#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000 +#define IOAT_CHANERR_XOR_Q_ERR 0x20000 +#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 + +#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) + +#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ + +#endif /* _IOAT_REGISTERS_H_ */ diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c new file mode 100644 index 00000000..79e3eba2 --- /dev/null +++ b/drivers/dma/iop-adma.c @@ -0,0 +1,1726 @@ +/* + * offload engine driver for the Intel Xscale series of i/o processors + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/memory.h> +#include <linux/ioport.h> +#include <linux/raid/pq.h> +#include <linux/slab.h> + +#include <mach/adma.h> + +#include "dmaengine.h" + +#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) +#define to_iop_adma_device(dev) \ + container_of(dev, struct iop_adma_device, common) +#define tx_to_iop_adma_slot(tx) \ + container_of(tx, struct iop_adma_desc_slot, async_tx) + +/** + * iop_adma_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &iop_chan->lock while calling this function + */ +static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) +{ + int stride = slot->slots_per_op; + + while (stride--) { + slot->slots_per_op = 0; + slot = list_entry(slot->slot_node.next, + struct iop_adma_desc_slot, + slot_node); + } +} + +static void +iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = iop_desc_get_dest_addr(unmap, iop_chan); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + } + desc->group_head = NULL; +} + +static void +iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt = unmap->unmap_src_cnt; + dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan); + dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan); + int i; + + if (tx->flags & DMA_PREP_CONTINUE) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + dma_addr_t addr; + + for (i = 0; i < src_cnt; i++) { + addr = iop_desc_get_src_addr(unmap, iop_chan, i); + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + if (desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE); + dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE); + } + } + + desc->group_head = NULL; +} + + +static dma_cookie_t +iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *iop_chan, dma_cookie_t cookie) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie < 0); + if (tx->cookie > 0) { + cookie = tx->cookie; + tx->cookie = 0; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (tx->callback) + tx->callback(tx->callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + */ + if (desc->group_head && desc->unmap_len) { + if (iop_desc_is_pq(desc)) + iop_desc_unmap_pq(iop_chan, desc); + else + iop_desc_unmap(iop_chan, desc); + } + } + + /* run dependent operations */ + dma_run_dependencies(tx); + + return cookie; +} + +static int +iop_adma_clean_slot(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *iop_chan) +{ + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) + return 0; + + /* leave the last descriptor in the chain + * so we can append to it + */ + if (desc->chain_node.next == &iop_chan->chain) + return 1; + + dev_dbg(iop_chan->device->common.dev, + "\tfree slot: %d slots_per_op: %d\n", + desc->idx, desc->slots_per_op); + + list_del(&desc->chain_node); + iop_adma_free_slots(desc); + + return 0; +} + +static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; + dma_cookie_t cookie = 0; + u32 current_desc = iop_chan_get_current_descriptor(iop_chan); + int busy = iop_chan_is_busy(iop_chan); + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + /* free completed slots from the chain starting with + * the oldest descriptor + */ + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, + chain_node) { + pr_debug("\tcookie: %d slot: %d busy: %d " + "this_desc: %#x next_desc: %#x ack: %d\n", + iter->async_tx.cookie, iter->idx, busy, + iter->async_tx.phys, iop_desc_get_next_desc(iter), + async_tx_test_ack(&iter->async_tx)); + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy, or if it appears that the current descriptor + * needs to be re-read (i.e. has been appended to) + */ + if (iter->async_tx.phys == current_desc) { + BUG_ON(seen_current++); + if (busy || iop_desc_get_next_desc(iter)) + break; + } + + /* detect the start of a group transaction */ + if (!slot_cnt && !slots_per_op) { + slot_cnt = iter->slot_cnt; + slots_per_op = iter->slots_per_op; + if (slot_cnt <= slots_per_op) { + slot_cnt = 0; + slots_per_op = 0; + } + } + + if (slot_cnt) { + pr_debug("\tgroup++\n"); + if (!grp_start) + grp_start = iter; + slot_cnt -= slots_per_op; + } + + /* all the members of a group are complete */ + if (slots_per_op != 0 && slot_cnt == 0) { + struct iop_adma_desc_slot *grp_iter, *_grp_iter; + int end_of_chain = 0; + pr_debug("\tgroup end\n"); + + /* collect the total results */ + if (grp_start->xor_check_result) { + u32 zero_sum_result = 0; + slot_cnt = grp_start->slot_cnt; + grp_iter = grp_start; + + list_for_each_entry_from(grp_iter, + &iop_chan->chain, chain_node) { + zero_sum_result |= + iop_desc_get_zero_result(grp_iter); + pr_debug("\titer%d result: %d\n", + grp_iter->idx, zero_sum_result); + slot_cnt -= slots_per_op; + if (slot_cnt == 0) + break; + } + pr_debug("\tgrp_start->xor_check_result: %p\n", + grp_start->xor_check_result); + *grp_start->xor_check_result = zero_sum_result; + } + + /* clean up the group */ + slot_cnt = grp_start->slot_cnt; + grp_iter = grp_start; + list_for_each_entry_safe_from(grp_iter, _grp_iter, + &iop_chan->chain, chain_node) { + cookie = iop_adma_run_tx_complete_actions( + grp_iter, iop_chan, cookie); + + slot_cnt -= slots_per_op; + end_of_chain = iop_adma_clean_slot(grp_iter, + iop_chan); + + if (slot_cnt == 0 || end_of_chain) + break; + } + + /* the group should be complete at this point */ + BUG_ON(slot_cnt); + + slots_per_op = 0; + grp_start = NULL; + if (end_of_chain) + break; + else + continue; + } else if (slots_per_op) /* wait for group completion */ + continue; + + /* write back zero sum results (single descriptor case) */ + if (iter->xor_check_result && iter->async_tx.cookie) + *iter->xor_check_result = + iop_desc_get_zero_result(iter); + + cookie = iop_adma_run_tx_complete_actions( + iter, iop_chan, cookie); + + if (iop_adma_clean_slot(iter, iop_chan)) + break; + } + + if (cookie > 0) { + iop_chan->common.completed_cookie = cookie; + pr_debug("\tcompleted cookie %d\n", cookie); + } +} + +static void +iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) +{ + spin_lock_bh(&iop_chan->lock); + __iop_adma_slot_cleanup(iop_chan); + spin_unlock_bh(&iop_chan->lock); +} + +static void iop_adma_tasklet(unsigned long data) +{ + struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; + + /* lockdep will flag depedency submissions as potentially + * recursive locking, this is not the case as a dependency + * submission will never recurse a channels submit routine. + * There are checks in async_tx.c to prevent this. + */ + spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); + __iop_adma_slot_cleanup(iop_chan); + spin_unlock(&iop_chan->lock); +} + +static struct iop_adma_desc_slot * +iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, + int slots_per_op) +{ + struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; + LIST_HEAD(chain); + int slots_found, retry = 0; + + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = iop_chan->last_used; + else + iter = list_entry(&iop_chan->all_slots, + struct iop_adma_desc_slot, + slot_node); + + list_for_each_entry_safe_continue( + iter, _iter, &iop_chan->all_slots, slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + /* give up after finding the first busy slot + * on the second pass through the list + */ + if (retry) + break; + + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) { + if (iop_desc_is_aligned(iter, slots_per_op)) + alloc_start = iter; + else { + slots_found = 0; + continue; + } + } + + if (slots_found == num_slots) { + struct iop_adma_desc_slot *alloc_tail = NULL; + struct iop_adma_desc_slot *last_used = NULL; + iter = alloc_start; + while (num_slots) { + int i; + dev_dbg(iop_chan->device->common.dev, + "allocated slot: %d " + "(desc %p phys: %#x) slots_per_op %d\n", + iter->idx, iter->hw_desc, + iter->async_tx.phys, slots_per_op); + + /* pre-ack all but the last descriptor */ + if (num_slots != slots_per_op) + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct iop_adma_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->tx_list); + iop_chan->last_used = last_used; + iop_desc_clear_next_desc(alloc_start); + iop_desc_clear_next_desc(alloc_tail); + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* perform direct reclaim if the allocation fails */ + __iop_adma_slot_cleanup(iop_chan); + + return NULL; +} + +static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) +{ + dev_dbg(iop_chan->device->common.dev, "pending: %d\n", + iop_chan->pending); + + if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { + iop_chan->pending = 0; + iop_chan_append(iop_chan); + } +} + +static dma_cookie_t +iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); + struct iop_adma_desc_slot *grp_start, *old_chain_tail; + int slot_cnt; + int slots_per_op; + dma_cookie_t cookie; + dma_addr_t next_dma; + + grp_start = sw_desc->group_head; + slot_cnt = grp_start->slot_cnt; + slots_per_op = grp_start->slots_per_op; + + spin_lock_bh(&iop_chan->lock); + cookie = dma_cookie_assign(tx); + + old_chain_tail = list_entry(iop_chan->chain.prev, + struct iop_adma_desc_slot, chain_node); + list_splice_init(&sw_desc->tx_list, + &old_chain_tail->chain_node); + + /* fix up the hardware chain */ + next_dma = grp_start->async_tx.phys; + iop_desc_set_next_desc(old_chain_tail, next_dma); + BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */ + + /* check for pre-chained descriptors */ + iop_paranoia(iop_desc_get_next_desc(sw_desc)); + + /* increment the pending count by the number of slots + * memcpy operations have a 1:1 (slot:operation) relation + * other operations are heavier and will pop the threshold + * more often. + */ + iop_chan->pending += slot_cnt; + iop_adma_check_threshold(iop_chan); + spin_unlock_bh(&iop_chan->lock); + + dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", + __func__, sw_desc->async_tx.cookie, sw_desc->idx); + + return cookie; +} + +static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); +static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); + +/** + * iop_adma_alloc_chan_resources - returns the number of allocated descriptors + * @chan - allocate descriptor resources for this channel + * @client - current client requesting the channel be ready for requests + * + * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To + * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be + * greater than 2x the number slots needed to satisfy a device->max_xor + * request. + * */ +static int iop_adma_alloc_chan_resources(struct dma_chan *chan) +{ + char *hw_desc; + int idx; + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *slot = NULL; + int init = iop_chan->slots_allocated ? 0 : 1; + struct iop_adma_platform_data *plat_data = + iop_chan->device->pdev->dev.platform_data; + int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; + + /* Allocate descriptor slots */ + do { + idx = iop_chan->slots_allocated; + if (idx == num_descs_in_pool) + break; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "IOP ADMA Channel only initialized" + " %d descriptor slots", idx); + break; + } + hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = iop_adma_tx_submit; + INIT_LIST_HEAD(&slot->tx_list); + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + hw_desc = (char *) iop_chan->device->dma_desc_pool; + slot->async_tx.phys = + (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; + slot->idx = idx; + + spin_lock_bh(&iop_chan->lock); + iop_chan->slots_allocated++; + list_add_tail(&slot->slot_node, &iop_chan->all_slots); + spin_unlock_bh(&iop_chan->lock); + } while (iop_chan->slots_allocated < num_descs_in_pool); + + if (idx && !iop_chan->last_used) + iop_chan->last_used = list_entry(iop_chan->all_slots.next, + struct iop_adma_desc_slot, + slot_node); + + dev_dbg(iop_chan->device->common.dev, + "allocated %d descriptor slots last_used: %p\n", + iop_chan->slots_allocated, iop_chan->last_used); + + /* initialize the channel and the chain with a null operation */ + if (init) { + if (dma_has_cap(DMA_MEMCPY, + iop_chan->device->common.cap_mask)) + iop_chan_start_null_memcpy(iop_chan); + else if (dma_has_cap(DMA_XOR, + iop_chan->device->common.cap_mask)) + iop_chan_start_null_xor(iop_chan); + else + BUG(); + } + + return (idx > 0) ? idx : -ENOMEM; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_interrupt(grp_start, iop_chan); + grp_start->unmap_len = 0; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", + __func__, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_memcpy(grp_start, flags); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); + iop_desc_set_memcpy_src_addr(grp_start, dma_src); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, + int value, size_t len, unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", + __func__, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_memset(grp_start, flags); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_block_fill_val(grp_start, value); + iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t *dma_src, unsigned int src_cnt, size_t len, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u flags: %lx\n", + __func__, src_cnt, len, flags); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_xor(grp_start, src_cnt, flags); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_xor_src_addr(grp_start, src_cnt, + dma_src[src_cnt]); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, + unsigned int src_cnt, size_t len, u32 *result, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __func__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_zero_sum(grp_start, src_cnt, flags); + iop_desc_set_zero_sum_byte_count(grp_start, len); + grp_start->xor_check_result = result; + pr_debug("\t%s: grp_start->xor_check_result: %p\n", + __func__, grp_start->xor_check_result); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_zero_sum_src_addr(grp_start, src_cnt, + dma_src[src_cnt]); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + int continue_srcs; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u flags: %lx\n", + __func__, src_cnt, len, flags); + + if (dmaf_p_disabled_continue(flags)) + continue_srcs = 1+src_cnt; + else if (dmaf_continue(flags)) + continue_srcs = 3+src_cnt; + else + continue_srcs = 0+src_cnt; + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + int i; + + g = sw_desc->group_head; + iop_desc_set_byte_count(g, iop_chan, len); + + /* even if P is disabled its destination address (bits + * [3:0]) must match Q. It is ok if P points to an + * invalid address, it won't be written. + */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1] & 0x7; + + iop_desc_set_pq_addr(g, dst); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + for (i = 0; i < src_cnt; i++) + iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); + + /* if we are continuing a previous operation factor in + * the old p and q values, see the comment for dma_maxpq + * in include/linux/dmaengine.h + */ + if (dmaf_p_disabled_continue(flags)) + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + else if (dmaf_continue(flags)) { + iop_desc_set_pq_src_addr(g, i++, dst[0], 0); + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + iop_desc_set_pq_src_addr(g, i++, dst[1], 0); + } + iop_desc_init_pq(g, i, flags); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, enum sum_check_flags *pqres, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __func__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + /* for validate operations p and q are tagged onto the + * end of the source list + */ + int pq_idx = src_cnt; + + g = sw_desc->group_head; + iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); + iop_desc_set_pq_zero_sum_byte_count(g, len); + g->pq_check_result = pqres; + pr_debug("\t%s: g->pq_check_result: %p\n", + __func__, g->pq_check_result); + sw_desc->unmap_src_cnt = src_cnt+2; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, + src[src_cnt], + scf[src_cnt]); + iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void iop_adma_free_chan_resources(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *iter, *_iter; + int in_use_descs = 0; + + iop_adma_slot_cleanup(iop_chan); + + spin_lock_bh(&iop_chan->lock); + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe_reverse( + iter, _iter, &iop_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + iop_chan->slots_allocated--; + } + iop_chan->last_used = NULL; + + dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", + __func__, iop_chan->slots_allocated); + spin_unlock_bh(&iop_chan->lock); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", + in_use_descs - 1); +} + +/** + * iop_adma_status - poll the status of an ADMA transaction + * @chan: ADMA channel handle + * @cookie: ADMA transaction identifier + * @txstate: a holder for the current state of the channel or NULL + */ +static enum dma_status iop_adma_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + int ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_SUCCESS) + return ret; + + iop_adma_slot_cleanup(iop_chan); + + return dma_cookie_status(chan, cookie, txstate); +} + +static irqreturn_t iop_adma_eot_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, "%s\n", __func__); + + tasklet_schedule(&chan->irq_tasklet); + + iop_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +static irqreturn_t iop_adma_eoc_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, "%s\n", __func__); + + tasklet_schedule(&chan->irq_tasklet); + + iop_adma_device_clear_eoc_status(chan); + + return IRQ_HANDLED; +} + +static irqreturn_t iop_adma_err_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + unsigned long status = iop_chan_get_status(chan); + + dev_printk(KERN_ERR, chan->device->common.dev, + "error ( %s%s%s%s%s%s%s)\n", + iop_is_err_int_parity(status, chan) ? "int_parity " : "", + iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", + iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", + iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", + iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", + iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", + iop_is_err_split_tx(status, chan) ? "split_tx " : ""); + + iop_adma_device_clear_err_status(chan); + + BUG(); + + return IRQ_HANDLED; +} + +static void iop_adma_issue_pending(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + + if (iop_chan->pending) { + iop_chan->pending = 0; + iop_chan_append(iop_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ +#define IOP_ADMA_TEST_SIZE 2000 + +static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) +{ + int i; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + int err = 0; + struct iop_adma_chan *iop_chan; + + dev_dbg(device->common.dev, "%s\n", __func__); + + src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + dest_dma = dma_map_single(dma_chan->device->dev, dest, + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); + src_dma = dma_map_single(dma_chan->device->dev, src, + IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); + tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma, + IOP_ADMA_TEST_SIZE, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(1); + + if (iop_adma_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + iop_chan = to_iop_adma_chan(dma_chan); + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ +static int __devinit +iop_adma_xor_val_self_test(struct iop_adma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; + struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 zero_sum_result; + int err = 0; + struct iop_adma_chan *iop_chan; + + dev_dbg(device->common.dev, "%s\n", __func__); + + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + iop_chan = to_iop_adma_chan(dma_chan); + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_TO_DEVICE); + + /* skip zero sum if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + /* zero sum the sources with the destintation page */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + zero_sum_srcs[i] = xor_srcs[i]; + zero_sum_srcs[i] = dest; + + zero_sum_result = 1; + + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, + zero_sum_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test zero sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test zero sum failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test memset */ + dma_addr = dma_map_page(dma_chan->device->dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test memset timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test memset failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + zero_sum_result = 0; + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, + zero_sum_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test non-zero sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 1) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test non-zero sum failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + src_idx = IOP_ADMA_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +#ifdef CONFIG_RAID6_PQ +static int __devinit +iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) +{ + /* combined sources, software pq results, and extra hw pq results */ + struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; + /* ptr to the extra hw pq buffers defined above */ + struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; + /* address conversion buffers (dma_map / page_address) */ + void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; + dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2]; + dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST]; + + int i; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u32 zero_sum_result; + int err = 0; + struct device *dev; + + dev_dbg(device->common.dev, "%s\n", __func__); + + for (i = 0; i < ARRAY_SIZE(pq); i++) { + pq[i] = alloc_page(GFP_KERNEL); + if (!pq[i]) { + while (i--) + __free_page(pq[i]); + return -ENOMEM; + } + } + + /* Fill in src buffers */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { + pq_sw[i] = page_address(pq[i]); + memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE); + } + pq_sw[i] = page_address(pq[i]); + pq_sw[i+1] = page_address(pq[i+1]); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + dev = dma_chan->device->dev; + + /* initialize the dests */ + memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); + memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); + + /* test pq */ + pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); + pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, + IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, + PAGE_SIZE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); + + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], + page_address(pq_hw[0]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test p failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], + page_address(pq_hw[1]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test q failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test correct zero sum using the software generated pq values */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = ~0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + + /* test incorrect zero sum */ + i = IOP_ADMA_NUM_SRC_TEST; + memset(pq_sw[i] + 100, 0, 100); + memset(pq_sw[i+1] + 200, 0, 200); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = 0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { + dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + i = ARRAY_SIZE(pq); + while (i--) + __free_page(pq[i]); + return err; +} +#endif + +static int __devexit iop_adma_remove(struct platform_device *dev) +{ + struct iop_adma_device *device = platform_get_drvdata(dev); + struct dma_chan *chan, *_chan; + struct iop_adma_chan *iop_chan; + struct iop_adma_platform_data *plat_data = dev->dev.platform_data; + + dma_async_device_unregister(&device->common); + + dma_free_coherent(&dev->dev, plat_data->pool_size, + device->dma_desc_pool_virt, device->dma_desc_pool); + + list_for_each_entry_safe(chan, _chan, &device->common.channels, + device_node) { + iop_chan = to_iop_adma_chan(chan); + list_del(&chan->device_node); + kfree(iop_chan); + } + kfree(device); + + return 0; +} + +static int __devinit iop_adma_probe(struct platform_device *pdev) +{ + struct resource *res; + int ret = 0, i; + struct iop_adma_device *adev; + struct iop_adma_chan *iop_chan; + struct dma_device *dma_dev; + struct iop_adma_platform_data *plat_data = pdev->dev.platform_data; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + if (!devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), pdev->name)) + return -EBUSY; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + dma_dev = &adev->common; + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL)) == NULL) { + ret = -ENOMEM; + goto err_free_adev; + } + + dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %p\n", + __func__, adev->dma_desc_pool_virt, + (void *) adev->dma_desc_pool); + + adev->id = plat_data->hw_id; + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = plat_data->cap_mask; + + adev->pdev = pdev; + platform_set_drvdata(pdev, adev); + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; + dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; + dma_dev->device_tx_status = iop_adma_status; + dma_dev->device_issue_pending = iop_adma_issue_pending; + dma_dev->dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = iop_adma_get_max_xor(); + dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; + } + if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_xor_val = + iop_adma_prep_dma_xor_val; + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); + dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_pq_val = + iop_adma_prep_dma_pq_val; + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) + dma_dev->device_prep_dma_interrupt = + iop_adma_prep_dma_interrupt; + + iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); + if (!iop_chan) { + ret = -ENOMEM; + goto err_free_dma; + } + iop_chan->device = adev; + + iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!iop_chan->mmr_base) { + ret = -ENOMEM; + goto err_free_iop_chan; + } + tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long) + iop_chan); + + /* clear errors before enabling interrupts */ + iop_adma_device_clear_err_status(iop_chan); + + for (i = 0; i < 3; i++) { + irq_handler_t handler[] = { iop_adma_eot_handler, + iop_adma_eoc_handler, + iop_adma_err_handler }; + int irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = -ENXIO; + goto err_free_iop_chan; + } else { + ret = devm_request_irq(&pdev->dev, irq, + handler[i], 0, pdev->name, iop_chan); + if (ret) + goto err_free_iop_chan; + } + } + + spin_lock_init(&iop_chan->lock); + INIT_LIST_HEAD(&iop_chan->chain); + INIT_LIST_HEAD(&iop_chan->all_slots); + iop_chan->common.device = dma_dev; + dma_cookie_init(&iop_chan->common); + list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = iop_adma_memcpy_self_test(adev); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_iop_chan; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + ret = iop_adma_xor_val_self_test(adev); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_iop_chan; + } + + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { + #ifdef CONFIG_RAID6_PQ + ret = iop_adma_pq_zero_sum_self_test(adev); + dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); + #else + /* can not test raid6, so do not publish capability */ + dma_cap_clear(DMA_PQ, dma_dev->cap_mask); + dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); + ret = 0; + #endif + if (ret) + goto err_free_iop_chan; + } + + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " + "( %s%s%s%s%s%s%s)\n", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + dma_async_device_register(dma_dev); + goto out; + + err_free_iop_chan: + kfree(iop_chan); + err_free_dma: + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + err_free_adev: + kfree(adev); + out: + return ret; +} + +static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *sw_desc, *grp_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); + async_tx_ack(&sw_desc->async_tx); + iop_desc_init_memcpy(grp_start, 0); + iop_desc_set_byte_count(grp_start, iop_chan, 0); + iop_desc_set_dest_addr(grp_start, iop_chan, 0); + iop_desc_set_memcpy_src_addr(grp_start, 0); + + cookie = dma_cookie_assign(&sw_desc->async_tx); + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + iop_chan->common.completed_cookie = cookie - 1; + + /* channel should not be busy */ + BUG_ON(iop_chan_is_busy(iop_chan)); + + /* clear any prior error-status bits */ + iop_adma_device_clear_err_status(iop_chan); + + /* disable operation */ + iop_chan_disable(iop_chan); + + /* set the descriptor address */ + iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* run the descriptor */ + iop_chan_enable(iop_chan); + } else + dev_printk(KERN_ERR, iop_chan->device->common.dev, + "failed to allocate null descriptor\n"); + spin_unlock_bh(&iop_chan->lock); +} + +static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *sw_desc, *grp_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); + async_tx_ack(&sw_desc->async_tx); + iop_desc_init_null_xor(grp_start, 2, 0); + iop_desc_set_byte_count(grp_start, iop_chan, 0); + iop_desc_set_dest_addr(grp_start, iop_chan, 0); + iop_desc_set_xor_src_addr(grp_start, 0, 0); + iop_desc_set_xor_src_addr(grp_start, 1, 0); + + cookie = dma_cookie_assign(&sw_desc->async_tx); + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + iop_chan->common.completed_cookie = cookie - 1; + + /* channel should not be busy */ + BUG_ON(iop_chan_is_busy(iop_chan)); + + /* clear any prior error-status bits */ + iop_adma_device_clear_err_status(iop_chan); + + /* disable operation */ + iop_chan_disable(iop_chan); + + /* set the descriptor address */ + iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* run the descriptor */ + iop_chan_enable(iop_chan); + } else + dev_printk(KERN_ERR, iop_chan->device->common.dev, + "failed to allocate null descriptor\n"); + spin_unlock_bh(&iop_chan->lock); +} + +static struct platform_driver iop_adma_driver = { + .probe = iop_adma_probe, + .remove = __devexit_p(iop_adma_remove), + .driver = { + .owner = THIS_MODULE, + .name = "iop-adma", + }, +}; + +module_platform_driver(iop_adma_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("IOP ADMA Engine Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:iop-adma"); diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c new file mode 100644 index 00000000..bb48a57c --- /dev/null +++ b/drivers/dma/iovlock.c @@ -0,0 +1,280 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Portions based on net/core/datagram.c and copyrighted by their authors. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code allows the net stack to make use of a DMA engine for + * skb to iovec copies. + */ + +#include <linux/dmaengine.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <net/tcp.h> /* for memcpy_toiovec */ +#include <asm/io.h> +#include <asm/uaccess.h> + +static int num_pages_spanned(struct iovec *iov) +{ + return + ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) - + ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT); +} + +/* + * Pin down all the iovec pages needed for len bytes. + * Return a struct dma_pinned_list to keep track of pages pinned down. + * + * We are allocating a single chunk of memory, and then carving it up into + * 3 sections, the latter 2 whose size depends on the number of iovecs and the + * total number of pages, respectively. + */ +struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len) +{ + struct dma_pinned_list *local_list; + struct page **pages; + int i; + int ret; + int nr_iovecs = 0; + int iovec_len_used = 0; + int iovec_pages_used = 0; + + /* don't pin down non-user-based iovecs */ + if (segment_eq(get_fs(), KERNEL_DS)) + return NULL; + + /* determine how many iovecs/pages there are, up front */ + do { + iovec_len_used += iov[nr_iovecs].iov_len; + iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]); + nr_iovecs++; + } while (iovec_len_used < len); + + /* single kmalloc for pinned list, page_list[], and the page arrays */ + local_list = kmalloc(sizeof(*local_list) + + (nr_iovecs * sizeof (struct dma_page_list)) + + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL); + if (!local_list) + goto out; + + /* list of pages starts right after the page list array */ + pages = (struct page **) &local_list->page_list[nr_iovecs]; + + local_list->nr_iovecs = 0; + + for (i = 0; i < nr_iovecs; i++) { + struct dma_page_list *page_list = &local_list->page_list[i]; + + len -= iov[i].iov_len; + + if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) + goto unpin; + + page_list->nr_pages = num_pages_spanned(&iov[i]); + page_list->base_address = iov[i].iov_base; + + page_list->pages = pages; + pages += page_list->nr_pages; + + /* pin pages down */ + down_read(¤t->mm->mmap_sem); + ret = get_user_pages( + current, + current->mm, + (unsigned long) iov[i].iov_base, + page_list->nr_pages, + 1, /* write */ + 0, /* force */ + page_list->pages, + NULL); + up_read(¤t->mm->mmap_sem); + + if (ret != page_list->nr_pages) + goto unpin; + + local_list->nr_iovecs = i + 1; + } + + return local_list; + +unpin: + dma_unpin_iovec_pages(local_list); +out: + return NULL; +} + +void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list) +{ + int i, j; + + if (!pinned_list) + return; + + for (i = 0; i < pinned_list->nr_iovecs; i++) { + struct dma_page_list *page_list = &pinned_list->page_list[i]; + for (j = 0; j < page_list->nr_pages; j++) { + set_page_dirty_lock(page_list->pages[j]); + page_cache_release(page_list->pages[j]); + } + } + + kfree(pinned_list); +} + + +/* + * We have already pinned down the pages we will be using in the iovecs. + * Each entry in iov array has corresponding entry in pinned_list->page_list. + * Using array indexing to keep iov[] and page_list[] in sync. + * Initial elements in iov array's iov->iov_len will be 0 if already copied into + * by another call. + * iov array length remaining guaranteed to be bigger than len. + */ +dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len) +{ + int iov_byte_offset; + int copy; + dma_cookie_t dma_cookie = 0; + int iovec_idx; + int page_idx; + + if (!chan) + return memcpy_toiovec(iov, kdata, len); + + iovec_idx = 0; + while (iovec_idx < pinned_list->nr_iovecs) { + struct dma_page_list *page_list; + + /* skip already used-up iovecs */ + while (!iov[iovec_idx].iov_len) + iovec_idx++; + + page_list = &pinned_list->page_list[iovec_idx]; + + iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK); + page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK) + - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT; + + /* break up copies to not cross page boundary */ + while (iov[iovec_idx].iov_len) { + copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); + copy = min_t(int, copy, iov[iovec_idx].iov_len); + + dma_cookie = dma_async_memcpy_buf_to_pg(chan, + page_list->pages[page_idx], + iov_byte_offset, + kdata, + copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } + + len -= copy; + iov[iovec_idx].iov_len -= copy; + iov[iovec_idx].iov_base += copy; + + if (!len) + return dma_cookie; + + kdata += copy; + iov_byte_offset = 0; + page_idx++; + } + iovec_idx++; + } + + /* really bad if we ever run out of iovecs */ + BUG(); + return -EFAULT; +} + +dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, struct page *page, + unsigned int offset, size_t len) +{ + int iov_byte_offset; + int copy; + dma_cookie_t dma_cookie = 0; + int iovec_idx; + int page_idx; + int err; + + /* this needs as-yet-unimplemented buf-to-buff, so punt. */ + /* TODO: use dma for this */ + if (!chan || !pinned_list) { + u8 *vaddr = kmap(page); + err = memcpy_toiovec(iov, vaddr + offset, len); + kunmap(page); + return err; + } + + iovec_idx = 0; + while (iovec_idx < pinned_list->nr_iovecs) { + struct dma_page_list *page_list; + + /* skip already used-up iovecs */ + while (!iov[iovec_idx].iov_len) + iovec_idx++; + + page_list = &pinned_list->page_list[iovec_idx]; + + iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK); + page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK) + - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT; + + /* break up copies to not cross page boundary */ + while (iov[iovec_idx].iov_len) { + copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); + copy = min_t(int, copy, iov[iovec_idx].iov_len); + + dma_cookie = dma_async_memcpy_pg_to_pg(chan, + page_list->pages[page_idx], + iov_byte_offset, + page, + offset, + copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } + + len -= copy; + iov[iovec_idx].iov_len -= copy; + iov[iovec_idx].iov_base += copy; + + if (!len) + return dma_cookie; + + offset += copy; + iov_byte_offset = 0; + page_idx++; + } + iovec_idx++; + } + + /* really bad if we ever run out of iovecs */ + BUG(); + return -EFAULT; +} diff --git a/drivers/dma/ipu/Makefile b/drivers/dma/ipu/Makefile new file mode 100644 index 00000000..6704cf48 --- /dev/null +++ b/drivers/dma/ipu/Makefile @@ -0,0 +1 @@ +obj-y += ipu_irq.o ipu_idmac.o diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c new file mode 100644 index 00000000..62e3f8ec --- /dev/null +++ b/drivers/dma/ipu/ipu_idmac.c @@ -0,0 +1,1799 @@ +/* + * Copyright (C) 2008 + * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de> + * + * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/list.h> +#include <linux/clk.h> +#include <linux/vmalloc.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <mach/ipu.h> + +#include "../dmaengine.h" +#include "ipu_intern.h" + +#define FS_VF_IN_VALID 0x00000002 +#define FS_ENC_IN_VALID 0x00000001 + +static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan, + bool wait_for_stop); + +/* + * There can be only one, we could allocate it dynamically, but then we'd have + * to add an extra parameter to some functions, and use something as ugly as + * struct ipu *ipu = to_ipu(to_idmac(ichan->dma_chan.device)); + * in the ISR + */ +static struct ipu ipu_data; + +#define to_ipu(id) container_of(id, struct ipu, idmac) + +static u32 __idmac_read_icreg(struct ipu *ipu, unsigned long reg) +{ + return __raw_readl(ipu->reg_ic + reg); +} + +#define idmac_read_icreg(ipu, reg) __idmac_read_icreg(ipu, reg - IC_CONF) + +static void __idmac_write_icreg(struct ipu *ipu, u32 value, unsigned long reg) +{ + __raw_writel(value, ipu->reg_ic + reg); +} + +#define idmac_write_icreg(ipu, v, reg) __idmac_write_icreg(ipu, v, reg - IC_CONF) + +static u32 idmac_read_ipureg(struct ipu *ipu, unsigned long reg) +{ + return __raw_readl(ipu->reg_ipu + reg); +} + +static void idmac_write_ipureg(struct ipu *ipu, u32 value, unsigned long reg) +{ + __raw_writel(value, ipu->reg_ipu + reg); +} + +/***************************************************************************** + * IPU / IC common functions + */ +static void dump_idmac_reg(struct ipu *ipu) +{ + dev_dbg(ipu->dev, "IDMAC_CONF 0x%x, IC_CONF 0x%x, IDMAC_CHA_EN 0x%x, " + "IDMAC_CHA_PRI 0x%x, IDMAC_CHA_BUSY 0x%x\n", + idmac_read_icreg(ipu, IDMAC_CONF), + idmac_read_icreg(ipu, IC_CONF), + idmac_read_icreg(ipu, IDMAC_CHA_EN), + idmac_read_icreg(ipu, IDMAC_CHA_PRI), + idmac_read_icreg(ipu, IDMAC_CHA_BUSY)); + dev_dbg(ipu->dev, "BUF0_RDY 0x%x, BUF1_RDY 0x%x, CUR_BUF 0x%x, " + "DB_MODE 0x%x, TASKS_STAT 0x%x\n", + idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY), + idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY), + idmac_read_ipureg(ipu, IPU_CHA_CUR_BUF), + idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL), + idmac_read_ipureg(ipu, IPU_TASKS_STAT)); +} + +static uint32_t bytes_per_pixel(enum pixel_fmt fmt) +{ + switch (fmt) { + case IPU_PIX_FMT_GENERIC: /* generic data */ + case IPU_PIX_FMT_RGB332: + case IPU_PIX_FMT_YUV420P: + case IPU_PIX_FMT_YUV422P: + default: + return 1; + case IPU_PIX_FMT_RGB565: + case IPU_PIX_FMT_YUYV: + case IPU_PIX_FMT_UYVY: + return 2; + case IPU_PIX_FMT_BGR24: + case IPU_PIX_FMT_RGB24: + return 3; + case IPU_PIX_FMT_GENERIC_32: /* generic data */ + case IPU_PIX_FMT_BGR32: + case IPU_PIX_FMT_RGB32: + case IPU_PIX_FMT_ABGR32: + return 4; + } +} + +/* Enable direct write to memory by the Camera Sensor Interface */ +static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel) +{ + uint32_t ic_conf, mask; + + switch (channel) { + case IDMAC_IC_0: + mask = IC_CONF_PRPENC_EN; + break; + case IDMAC_IC_7: + mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN; + break; + default: + return; + } + ic_conf = idmac_read_icreg(ipu, IC_CONF) | mask; + idmac_write_icreg(ipu, ic_conf, IC_CONF); +} + +/* Called under spin_lock_irqsave(&ipu_data.lock) */ +static void ipu_ic_disable_task(struct ipu *ipu, enum ipu_channel channel) +{ + uint32_t ic_conf, mask; + + switch (channel) { + case IDMAC_IC_0: + mask = IC_CONF_PRPENC_EN; + break; + case IDMAC_IC_7: + mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN; + break; + default: + return; + } + ic_conf = idmac_read_icreg(ipu, IC_CONF) & ~mask; + idmac_write_icreg(ipu, ic_conf, IC_CONF); +} + +static uint32_t ipu_channel_status(struct ipu *ipu, enum ipu_channel channel) +{ + uint32_t stat = TASK_STAT_IDLE; + uint32_t task_stat_reg = idmac_read_ipureg(ipu, IPU_TASKS_STAT); + + switch (channel) { + case IDMAC_IC_7: + stat = (task_stat_reg & TSTAT_CSI2MEM_MASK) >> + TSTAT_CSI2MEM_OFFSET; + break; + case IDMAC_IC_0: + case IDMAC_SDC_0: + case IDMAC_SDC_1: + default: + break; + } + return stat; +} + +struct chan_param_mem_planar { + /* Word 0 */ + u32 xv:10; + u32 yv:10; + u32 xb:12; + + u32 yb:12; + u32 res1:2; + u32 nsb:1; + u32 lnpb:6; + u32 ubo_l:11; + + u32 ubo_h:15; + u32 vbo_l:17; + + u32 vbo_h:9; + u32 res2:3; + u32 fw:12; + u32 fh_l:8; + + u32 fh_h:4; + u32 res3:28; + + /* Word 1 */ + u32 eba0; + + u32 eba1; + + u32 bpp:3; + u32 sl:14; + u32 pfs:3; + u32 bam:3; + u32 res4:2; + u32 npb:6; + u32 res5:1; + + u32 sat:2; + u32 res6:30; +} __attribute__ ((packed)); + +struct chan_param_mem_interleaved { + /* Word 0 */ + u32 xv:10; + u32 yv:10; + u32 xb:12; + + u32 yb:12; + u32 sce:1; + u32 res1:1; + u32 nsb:1; + u32 lnpb:6; + u32 sx:10; + u32 sy_l:1; + + u32 sy_h:9; + u32 ns:10; + u32 sm:10; + u32 sdx_l:3; + + u32 sdx_h:2; + u32 sdy:5; + u32 sdrx:1; + u32 sdry:1; + u32 sdr1:1; + u32 res2:2; + u32 fw:12; + u32 fh_l:8; + + u32 fh_h:4; + u32 res3:28; + + /* Word 1 */ + u32 eba0; + + u32 eba1; + + u32 bpp:3; + u32 sl:14; + u32 pfs:3; + u32 bam:3; + u32 res4:2; + u32 npb:6; + u32 res5:1; + + u32 sat:2; + u32 scc:1; + u32 ofs0:5; + u32 ofs1:5; + u32 ofs2:5; + u32 ofs3:5; + u32 wid0:3; + u32 wid1:3; + u32 wid2:3; + + u32 wid3:3; + u32 dec_sel:1; + u32 res6:28; +} __attribute__ ((packed)); + +union chan_param_mem { + struct chan_param_mem_planar pp; + struct chan_param_mem_interleaved ip; +}; + +static void ipu_ch_param_set_plane_offset(union chan_param_mem *params, + u32 u_offset, u32 v_offset) +{ + params->pp.ubo_l = u_offset & 0x7ff; + params->pp.ubo_h = u_offset >> 11; + params->pp.vbo_l = v_offset & 0x1ffff; + params->pp.vbo_h = v_offset >> 17; +} + +static void ipu_ch_param_set_size(union chan_param_mem *params, + uint32_t pixel_fmt, uint16_t width, + uint16_t height, uint16_t stride) +{ + u32 u_offset; + u32 v_offset; + + params->pp.fw = width - 1; + params->pp.fh_l = height - 1; + params->pp.fh_h = (height - 1) >> 8; + params->pp.sl = stride - 1; + + switch (pixel_fmt) { + case IPU_PIX_FMT_GENERIC: + /*Represents 8-bit Generic data */ + params->pp.bpp = 3; + params->pp.pfs = 7; + params->pp.npb = 31; + params->pp.sat = 2; /* SAT = use 32-bit access */ + break; + case IPU_PIX_FMT_GENERIC_32: + /*Represents 32-bit Generic data */ + params->pp.bpp = 0; + params->pp.pfs = 7; + params->pp.npb = 7; + params->pp.sat = 2; /* SAT = use 32-bit access */ + break; + case IPU_PIX_FMT_RGB565: + params->ip.bpp = 2; + params->ip.pfs = 4; + params->ip.npb = 15; + params->ip.sat = 2; /* SAT = 32-bit access */ + params->ip.ofs0 = 0; /* Red bit offset */ + params->ip.ofs1 = 5; /* Green bit offset */ + params->ip.ofs2 = 11; /* Blue bit offset */ + params->ip.ofs3 = 16; /* Alpha bit offset */ + params->ip.wid0 = 4; /* Red bit width - 1 */ + params->ip.wid1 = 5; /* Green bit width - 1 */ + params->ip.wid2 = 4; /* Blue bit width - 1 */ + break; + case IPU_PIX_FMT_BGR24: + params->ip.bpp = 1; /* 24 BPP & RGB PFS */ + params->ip.pfs = 4; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + params->ip.ofs0 = 0; /* Red bit offset */ + params->ip.ofs1 = 8; /* Green bit offset */ + params->ip.ofs2 = 16; /* Blue bit offset */ + params->ip.ofs3 = 24; /* Alpha bit offset */ + params->ip.wid0 = 7; /* Red bit width - 1 */ + params->ip.wid1 = 7; /* Green bit width - 1 */ + params->ip.wid2 = 7; /* Blue bit width - 1 */ + break; + case IPU_PIX_FMT_RGB24: + params->ip.bpp = 1; /* 24 BPP & RGB PFS */ + params->ip.pfs = 4; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + params->ip.ofs0 = 16; /* Red bit offset */ + params->ip.ofs1 = 8; /* Green bit offset */ + params->ip.ofs2 = 0; /* Blue bit offset */ + params->ip.ofs3 = 24; /* Alpha bit offset */ + params->ip.wid0 = 7; /* Red bit width - 1 */ + params->ip.wid1 = 7; /* Green bit width - 1 */ + params->ip.wid2 = 7; /* Blue bit width - 1 */ + break; + case IPU_PIX_FMT_BGRA32: + case IPU_PIX_FMT_BGR32: + case IPU_PIX_FMT_ABGR32: + params->ip.bpp = 0; + params->ip.pfs = 4; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + params->ip.ofs0 = 8; /* Red bit offset */ + params->ip.ofs1 = 16; /* Green bit offset */ + params->ip.ofs2 = 24; /* Blue bit offset */ + params->ip.ofs3 = 0; /* Alpha bit offset */ + params->ip.wid0 = 7; /* Red bit width - 1 */ + params->ip.wid1 = 7; /* Green bit width - 1 */ + params->ip.wid2 = 7; /* Blue bit width - 1 */ + params->ip.wid3 = 7; /* Alpha bit width - 1 */ + break; + case IPU_PIX_FMT_RGBA32: + case IPU_PIX_FMT_RGB32: + params->ip.bpp = 0; + params->ip.pfs = 4; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + params->ip.ofs0 = 24; /* Red bit offset */ + params->ip.ofs1 = 16; /* Green bit offset */ + params->ip.ofs2 = 8; /* Blue bit offset */ + params->ip.ofs3 = 0; /* Alpha bit offset */ + params->ip.wid0 = 7; /* Red bit width - 1 */ + params->ip.wid1 = 7; /* Green bit width - 1 */ + params->ip.wid2 = 7; /* Blue bit width - 1 */ + params->ip.wid3 = 7; /* Alpha bit width - 1 */ + break; + case IPU_PIX_FMT_UYVY: + params->ip.bpp = 2; + params->ip.pfs = 6; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + break; + case IPU_PIX_FMT_YUV420P2: + case IPU_PIX_FMT_YUV420P: + params->ip.bpp = 3; + params->ip.pfs = 3; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + u_offset = stride * height; + v_offset = u_offset + u_offset / 4; + ipu_ch_param_set_plane_offset(params, u_offset, v_offset); + break; + case IPU_PIX_FMT_YVU422P: + params->ip.bpp = 3; + params->ip.pfs = 2; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + v_offset = stride * height; + u_offset = v_offset + v_offset / 2; + ipu_ch_param_set_plane_offset(params, u_offset, v_offset); + break; + case IPU_PIX_FMT_YUV422P: + params->ip.bpp = 3; + params->ip.pfs = 2; + params->ip.npb = 7; + params->ip.sat = 2; /* SAT = 32-bit access */ + u_offset = stride * height; + v_offset = u_offset + u_offset / 2; + ipu_ch_param_set_plane_offset(params, u_offset, v_offset); + break; + default: + dev_err(ipu_data.dev, + "mx3 ipu: unimplemented pixel format %d\n", pixel_fmt); + break; + } + + params->pp.nsb = 1; +} + +static void ipu_ch_param_set_buffer(union chan_param_mem *params, + dma_addr_t buf0, dma_addr_t buf1) +{ + params->pp.eba0 = buf0; + params->pp.eba1 = buf1; +} + +static void ipu_ch_param_set_rotation(union chan_param_mem *params, + enum ipu_rotate_mode rotate) +{ + params->pp.bam = rotate; +} + +static void ipu_write_param_mem(uint32_t addr, uint32_t *data, + uint32_t num_words) +{ + for (; num_words > 0; num_words--) { + dev_dbg(ipu_data.dev, + "write param mem - addr = 0x%08X, data = 0x%08X\n", + addr, *data); + idmac_write_ipureg(&ipu_data, addr, IPU_IMA_ADDR); + idmac_write_ipureg(&ipu_data, *data++, IPU_IMA_DATA); + addr++; + if ((addr & 0x7) == 5) { + addr &= ~0x7; /* set to word 0 */ + addr += 8; /* increment to next row */ + } + } +} + +static int calc_resize_coeffs(uint32_t in_size, uint32_t out_size, + uint32_t *resize_coeff, + uint32_t *downsize_coeff) +{ + uint32_t temp_size; + uint32_t temp_downsize; + + *resize_coeff = 1 << 13; + *downsize_coeff = 1 << 13; + + /* Cannot downsize more than 8:1 */ + if (out_size << 3 < in_size) + return -EINVAL; + + /* compute downsizing coefficient */ + temp_downsize = 0; + temp_size = in_size; + while (temp_size >= out_size * 2 && temp_downsize < 2) { + temp_size >>= 1; + temp_downsize++; + } + *downsize_coeff = temp_downsize; + + /* + * compute resizing coefficient using the following formula: + * resize_coeff = M*(SI -1)/(SO - 1) + * where M = 2^13, SI - input size, SO - output size + */ + *resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1); + if (*resize_coeff >= 16384L) { + dev_err(ipu_data.dev, "Warning! Overflow on resize coeff.\n"); + *resize_coeff = 0x3FFF; + } + + dev_dbg(ipu_data.dev, "resizing from %u -> %u pixels, " + "downsize=%u, resize=%u.%lu (reg=%u)\n", in_size, out_size, + *downsize_coeff, *resize_coeff >= 8192L ? 1 : 0, + ((*resize_coeff & 0x1FFF) * 10000L) / 8192L, *resize_coeff); + + return 0; +} + +static enum ipu_color_space format_to_colorspace(enum pixel_fmt fmt) +{ + switch (fmt) { + case IPU_PIX_FMT_RGB565: + case IPU_PIX_FMT_BGR24: + case IPU_PIX_FMT_RGB24: + case IPU_PIX_FMT_BGR32: + case IPU_PIX_FMT_RGB32: + return IPU_COLORSPACE_RGB; + default: + return IPU_COLORSPACE_YCBCR; + } +} + +static int ipu_ic_init_prpenc(struct ipu *ipu, + union ipu_channel_param *params, bool src_is_csi) +{ + uint32_t reg, ic_conf; + uint32_t downsize_coeff, resize_coeff; + enum ipu_color_space in_fmt, out_fmt; + + /* Setup vertical resizing */ + calc_resize_coeffs(params->video.in_height, + params->video.out_height, + &resize_coeff, &downsize_coeff); + reg = (downsize_coeff << 30) | (resize_coeff << 16); + + /* Setup horizontal resizing */ + calc_resize_coeffs(params->video.in_width, + params->video.out_width, + &resize_coeff, &downsize_coeff); + reg |= (downsize_coeff << 14) | resize_coeff; + + /* Setup color space conversion */ + in_fmt = format_to_colorspace(params->video.in_pixel_fmt); + out_fmt = format_to_colorspace(params->video.out_pixel_fmt); + + /* + * Colourspace conversion unsupported yet - see _init_csc() in + * Freescale sources + */ + if (in_fmt != out_fmt) { + dev_err(ipu->dev, "Colourspace conversion unsupported!\n"); + return -EOPNOTSUPP; + } + + idmac_write_icreg(ipu, reg, IC_PRP_ENC_RSC); + + ic_conf = idmac_read_icreg(ipu, IC_CONF); + + if (src_is_csi) + ic_conf &= ~IC_CONF_RWS_EN; + else + ic_conf |= IC_CONF_RWS_EN; + + idmac_write_icreg(ipu, ic_conf, IC_CONF); + + return 0; +} + +static uint32_t dma_param_addr(uint32_t dma_ch) +{ + /* Channel Parameter Memory */ + return 0x10000 | (dma_ch << 4); +} + +static void ipu_channel_set_priority(struct ipu *ipu, enum ipu_channel channel, + bool prio) +{ + u32 reg = idmac_read_icreg(ipu, IDMAC_CHA_PRI); + + if (prio) + reg |= 1UL << channel; + else + reg &= ~(1UL << channel); + + idmac_write_icreg(ipu, reg, IDMAC_CHA_PRI); + + dump_idmac_reg(ipu); +} + +static uint32_t ipu_channel_conf_mask(enum ipu_channel channel) +{ + uint32_t mask; + + switch (channel) { + case IDMAC_IC_0: + case IDMAC_IC_7: + mask = IPU_CONF_CSI_EN | IPU_CONF_IC_EN; + break; + case IDMAC_SDC_0: + case IDMAC_SDC_1: + mask = IPU_CONF_SDC_EN | IPU_CONF_DI_EN; + break; + default: + mask = 0; + break; + } + + return mask; +} + +/** + * ipu_enable_channel() - enable an IPU channel. + * @idmac: IPU DMAC context. + * @ichan: IDMAC channel. + * @return: 0 on success or negative error code on failure. + */ +static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan) +{ + struct ipu *ipu = to_ipu(idmac); + enum ipu_channel channel = ichan->dma_chan.chan_id; + uint32_t reg; + unsigned long flags; + + spin_lock_irqsave(&ipu->lock, flags); + + /* Reset to buffer 0 */ + idmac_write_ipureg(ipu, 1UL << channel, IPU_CHA_CUR_BUF); + ichan->active_buffer = 0; + ichan->status = IPU_CHANNEL_ENABLED; + + switch (channel) { + case IDMAC_SDC_0: + case IDMAC_SDC_1: + case IDMAC_IC_7: + ipu_channel_set_priority(ipu, channel, true); + default: + break; + } + + reg = idmac_read_icreg(ipu, IDMAC_CHA_EN); + + idmac_write_icreg(ipu, reg | (1UL << channel), IDMAC_CHA_EN); + + ipu_ic_enable_task(ipu, channel); + + spin_unlock_irqrestore(&ipu->lock, flags); + return 0; +} + +/** + * ipu_init_channel_buffer() - initialize a buffer for logical IPU channel. + * @ichan: IDMAC channel. + * @pixel_fmt: pixel format of buffer. Pixel format is a FOURCC ASCII code. + * @width: width of buffer in pixels. + * @height: height of buffer in pixels. + * @stride: stride length of buffer in pixels. + * @rot_mode: rotation mode of buffer. A rotation setting other than + * IPU_ROTATE_VERT_FLIP should only be used for input buffers of + * rotation channels. + * @phyaddr_0: buffer 0 physical address. + * @phyaddr_1: buffer 1 physical address. Setting this to a value other than + * NULL enables double buffering mode. + * @return: 0 on success or negative error code on failure. + */ +static int ipu_init_channel_buffer(struct idmac_channel *ichan, + enum pixel_fmt pixel_fmt, + uint16_t width, uint16_t height, + uint32_t stride, + enum ipu_rotate_mode rot_mode, + dma_addr_t phyaddr_0, dma_addr_t phyaddr_1) +{ + enum ipu_channel channel = ichan->dma_chan.chan_id; + struct idmac *idmac = to_idmac(ichan->dma_chan.device); + struct ipu *ipu = to_ipu(idmac); + union chan_param_mem params = {}; + unsigned long flags; + uint32_t reg; + uint32_t stride_bytes; + + stride_bytes = stride * bytes_per_pixel(pixel_fmt); + + if (stride_bytes % 4) { + dev_err(ipu->dev, + "Stride length must be 32-bit aligned, stride = %d, bytes = %d\n", + stride, stride_bytes); + return -EINVAL; + } + + /* IC channel's stride must be a multiple of 8 pixels */ + if ((channel <= IDMAC_IC_13) && (stride % 8)) { + dev_err(ipu->dev, "Stride must be 8 pixel multiple\n"); + return -EINVAL; + } + + /* Build parameter memory data for DMA channel */ + ipu_ch_param_set_size(¶ms, pixel_fmt, width, height, stride_bytes); + ipu_ch_param_set_buffer(¶ms, phyaddr_0, phyaddr_1); + ipu_ch_param_set_rotation(¶ms, rot_mode); + + spin_lock_irqsave(&ipu->lock, flags); + + ipu_write_param_mem(dma_param_addr(channel), (uint32_t *)¶ms, 10); + + reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL); + + if (phyaddr_1) + reg |= 1UL << channel; + else + reg &= ~(1UL << channel); + + idmac_write_ipureg(ipu, reg, IPU_CHA_DB_MODE_SEL); + + ichan->status = IPU_CHANNEL_READY; + + spin_unlock_irqrestore(&ipu->lock, flags); + + return 0; +} + +/** + * ipu_select_buffer() - mark a channel's buffer as ready. + * @channel: channel ID. + * @buffer_n: buffer number to mark ready. + */ +static void ipu_select_buffer(enum ipu_channel channel, int buffer_n) +{ + /* No locking - this is a write-one-to-set register, cleared by IPU */ + if (buffer_n == 0) + /* Mark buffer 0 as ready. */ + idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF0_RDY); + else + /* Mark buffer 1 as ready. */ + idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF1_RDY); +} + +/** + * ipu_update_channel_buffer() - update physical address of a channel buffer. + * @ichan: IDMAC channel. + * @buffer_n: buffer number to update. + * 0 or 1 are the only valid values. + * @phyaddr: buffer physical address. + */ +/* Called under spin_lock(_irqsave)(&ichan->lock) */ +static void ipu_update_channel_buffer(struct idmac_channel *ichan, + int buffer_n, dma_addr_t phyaddr) +{ + enum ipu_channel channel = ichan->dma_chan.chan_id; + uint32_t reg; + unsigned long flags; + + spin_lock_irqsave(&ipu_data.lock, flags); + + if (buffer_n == 0) { + reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY); + if (reg & (1UL << channel)) { + ipu_ic_disable_task(&ipu_data, channel); + ichan->status = IPU_CHANNEL_READY; + } + + /* 44.3.3.1.9 - Row Number 1 (WORD1, offset 0) */ + idmac_write_ipureg(&ipu_data, dma_param_addr(channel) + + 0x0008UL, IPU_IMA_ADDR); + idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA); + } else { + reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY); + if (reg & (1UL << channel)) { + ipu_ic_disable_task(&ipu_data, channel); + ichan->status = IPU_CHANNEL_READY; + } + + /* Check if double-buffering is already enabled */ + reg = idmac_read_ipureg(&ipu_data, IPU_CHA_DB_MODE_SEL); + + if (!(reg & (1UL << channel))) + idmac_write_ipureg(&ipu_data, reg | (1UL << channel), + IPU_CHA_DB_MODE_SEL); + + /* 44.3.3.1.9 - Row Number 1 (WORD1, offset 1) */ + idmac_write_ipureg(&ipu_data, dma_param_addr(channel) + + 0x0009UL, IPU_IMA_ADDR); + idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA); + } + + spin_unlock_irqrestore(&ipu_data.lock, flags); +} + +/* Called under spin_lock_irqsave(&ichan->lock) */ +static int ipu_submit_buffer(struct idmac_channel *ichan, + struct idmac_tx_desc *desc, struct scatterlist *sg, int buf_idx) +{ + unsigned int chan_id = ichan->dma_chan.chan_id; + struct device *dev = &ichan->dma_chan.dev->device; + + if (async_tx_test_ack(&desc->txd)) + return -EINTR; + + /* + * On first invocation this shouldn't be necessary, the call to + * ipu_init_channel_buffer() above will set addresses for us, so we + * could make it conditional on status >= IPU_CHANNEL_ENABLED, but + * doing it again shouldn't hurt either. + */ + ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg)); + + ipu_select_buffer(chan_id, buf_idx); + dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n", + sg, chan_id, buf_idx); + + return 0; +} + +/* Called under spin_lock_irqsave(&ichan->lock) */ +static int ipu_submit_channel_buffers(struct idmac_channel *ichan, + struct idmac_tx_desc *desc) +{ + struct scatterlist *sg; + int i, ret = 0; + + for (i = 0, sg = desc->sg; i < 2 && sg; i++) { + if (!ichan->sg[i]) { + ichan->sg[i] = sg; + + ret = ipu_submit_buffer(ichan, desc, sg, i); + if (ret < 0) + return ret; + + sg = sg_next(sg); + } + } + + return ret; +} + +static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct idmac_tx_desc *desc = to_tx_desc(tx); + struct idmac_channel *ichan = to_idmac_chan(tx->chan); + struct idmac *idmac = to_idmac(tx->chan->device); + struct ipu *ipu = to_ipu(idmac); + struct device *dev = &ichan->dma_chan.dev->device; + dma_cookie_t cookie; + unsigned long flags; + int ret; + + /* Sanity check */ + if (!list_empty(&desc->list)) { + /* The descriptor doesn't belong to client */ + dev_err(dev, "Descriptor %p not prepared!\n", tx); + return -EBUSY; + } + + mutex_lock(&ichan->chan_mutex); + + async_tx_clear_ack(tx); + + if (ichan->status < IPU_CHANNEL_READY) { + struct idmac_video_param *video = &ichan->params.video; + /* + * Initial buffer assignment - the first two sg-entries from + * the descriptor will end up in the IDMAC buffers + */ + dma_addr_t dma_1 = sg_is_last(desc->sg) ? 0 : + sg_dma_address(&desc->sg[1]); + + WARN_ON(ichan->sg[0] || ichan->sg[1]); + + cookie = ipu_init_channel_buffer(ichan, + video->out_pixel_fmt, + video->out_width, + video->out_height, + video->out_stride, + IPU_ROTATE_NONE, + sg_dma_address(&desc->sg[0]), + dma_1); + if (cookie < 0) + goto out; + } + + dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]); + + cookie = dma_cookie_assign(tx); + + /* ipu->lock can be taken under ichan->lock, but not v.v. */ + spin_lock_irqsave(&ichan->lock, flags); + + list_add_tail(&desc->list, &ichan->queue); + /* submit_buffers() atomically verifies and fills empty sg slots */ + ret = ipu_submit_channel_buffers(ichan, desc); + + spin_unlock_irqrestore(&ichan->lock, flags); + + if (ret < 0) { + cookie = ret; + goto dequeue; + } + + if (ichan->status < IPU_CHANNEL_ENABLED) { + ret = ipu_enable_channel(idmac, ichan); + if (ret < 0) { + cookie = ret; + goto dequeue; + } + } + + dump_idmac_reg(ipu); + +dequeue: + if (cookie < 0) { + spin_lock_irqsave(&ichan->lock, flags); + list_del_init(&desc->list); + spin_unlock_irqrestore(&ichan->lock, flags); + tx->cookie = cookie; + ichan->dma_chan.cookie = cookie; + } + +out: + mutex_unlock(&ichan->chan_mutex); + + return cookie; +} + +/* Called with ichan->chan_mutex held */ +static int idmac_desc_alloc(struct idmac_channel *ichan, int n) +{ + struct idmac_tx_desc *desc = vmalloc(n * sizeof(struct idmac_tx_desc)); + struct idmac *idmac = to_idmac(ichan->dma_chan.device); + + if (!desc) + return -ENOMEM; + + /* No interrupts, just disable the tasklet for a moment */ + tasklet_disable(&to_ipu(idmac)->tasklet); + + ichan->n_tx_desc = n; + ichan->desc = desc; + INIT_LIST_HEAD(&ichan->queue); + INIT_LIST_HEAD(&ichan->free_list); + + while (n--) { + struct dma_async_tx_descriptor *txd = &desc->txd; + + memset(txd, 0, sizeof(*txd)); + dma_async_tx_descriptor_init(txd, &ichan->dma_chan); + txd->tx_submit = idmac_tx_submit; + + list_add(&desc->list, &ichan->free_list); + + desc++; + } + + tasklet_enable(&to_ipu(idmac)->tasklet); + + return 0; +} + +/** + * ipu_init_channel() - initialize an IPU channel. + * @idmac: IPU DMAC context. + * @ichan: pointer to the channel object. + * @return 0 on success or negative error code on failure. + */ +static int ipu_init_channel(struct idmac *idmac, struct idmac_channel *ichan) +{ + union ipu_channel_param *params = &ichan->params; + uint32_t ipu_conf; + enum ipu_channel channel = ichan->dma_chan.chan_id; + unsigned long flags; + uint32_t reg; + struct ipu *ipu = to_ipu(idmac); + int ret = 0, n_desc = 0; + + dev_dbg(ipu->dev, "init channel = %d\n", channel); + + if (channel != IDMAC_SDC_0 && channel != IDMAC_SDC_1 && + channel != IDMAC_IC_7) + return -EINVAL; + + spin_lock_irqsave(&ipu->lock, flags); + + switch (channel) { + case IDMAC_IC_7: + n_desc = 16; + reg = idmac_read_icreg(ipu, IC_CONF); + idmac_write_icreg(ipu, reg & ~IC_CONF_CSI_MEM_WR_EN, IC_CONF); + break; + case IDMAC_IC_0: + n_desc = 16; + reg = idmac_read_ipureg(ipu, IPU_FS_PROC_FLOW); + idmac_write_ipureg(ipu, reg & ~FS_ENC_IN_VALID, IPU_FS_PROC_FLOW); + ret = ipu_ic_init_prpenc(ipu, params, true); + break; + case IDMAC_SDC_0: + case IDMAC_SDC_1: + n_desc = 4; + default: + break; + } + + ipu->channel_init_mask |= 1L << channel; + + /* Enable IPU sub module */ + ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) | + ipu_channel_conf_mask(channel); + idmac_write_ipureg(ipu, ipu_conf, IPU_CONF); + + spin_unlock_irqrestore(&ipu->lock, flags); + + if (n_desc && !ichan->desc) + ret = idmac_desc_alloc(ichan, n_desc); + + dump_idmac_reg(ipu); + + return ret; +} + +/** + * ipu_uninit_channel() - uninitialize an IPU channel. + * @idmac: IPU DMAC context. + * @ichan: pointer to the channel object. + */ +static void ipu_uninit_channel(struct idmac *idmac, struct idmac_channel *ichan) +{ + enum ipu_channel channel = ichan->dma_chan.chan_id; + unsigned long flags; + uint32_t reg; + unsigned long chan_mask = 1UL << channel; + uint32_t ipu_conf; + struct ipu *ipu = to_ipu(idmac); + + spin_lock_irqsave(&ipu->lock, flags); + + if (!(ipu->channel_init_mask & chan_mask)) { + dev_err(ipu->dev, "Channel already uninitialized %d\n", + channel); + spin_unlock_irqrestore(&ipu->lock, flags); + return; + } + + /* Reset the double buffer */ + reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL); + idmac_write_ipureg(ipu, reg & ~chan_mask, IPU_CHA_DB_MODE_SEL); + + ichan->sec_chan_en = false; + + switch (channel) { + case IDMAC_IC_7: + reg = idmac_read_icreg(ipu, IC_CONF); + idmac_write_icreg(ipu, reg & ~(IC_CONF_RWS_EN | IC_CONF_PRPENC_EN), + IC_CONF); + break; + case IDMAC_IC_0: + reg = idmac_read_icreg(ipu, IC_CONF); + idmac_write_icreg(ipu, reg & ~(IC_CONF_PRPENC_EN | IC_CONF_PRPENC_CSC1), + IC_CONF); + break; + case IDMAC_SDC_0: + case IDMAC_SDC_1: + default: + break; + } + + ipu->channel_init_mask &= ~(1L << channel); + + ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) & + ~ipu_channel_conf_mask(channel); + idmac_write_ipureg(ipu, ipu_conf, IPU_CONF); + + spin_unlock_irqrestore(&ipu->lock, flags); + + ichan->n_tx_desc = 0; + vfree(ichan->desc); + ichan->desc = NULL; +} + +/** + * ipu_disable_channel() - disable an IPU channel. + * @idmac: IPU DMAC context. + * @ichan: channel object pointer. + * @wait_for_stop: flag to set whether to wait for channel end of frame or + * return immediately. + * @return: 0 on success or negative error code on failure. + */ +static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan, + bool wait_for_stop) +{ + enum ipu_channel channel = ichan->dma_chan.chan_id; + struct ipu *ipu = to_ipu(idmac); + uint32_t reg; + unsigned long flags; + unsigned long chan_mask = 1UL << channel; + unsigned int timeout; + + if (wait_for_stop && channel != IDMAC_SDC_1 && channel != IDMAC_SDC_0) { + timeout = 40; + /* This waiting always fails. Related to spurious irq problem */ + while ((idmac_read_icreg(ipu, IDMAC_CHA_BUSY) & chan_mask) || + (ipu_channel_status(ipu, channel) == TASK_STAT_ACTIVE)) { + timeout--; + msleep(10); + + if (!timeout) { + dev_dbg(ipu->dev, + "Warning: timeout waiting for channel %u to " + "stop: buf0_rdy = 0x%08X, buf1_rdy = 0x%08X, " + "busy = 0x%08X, tstat = 0x%08X\n", channel, + idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY), + idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY), + idmac_read_icreg(ipu, IDMAC_CHA_BUSY), + idmac_read_ipureg(ipu, IPU_TASKS_STAT)); + break; + } + } + dev_dbg(ipu->dev, "timeout = %d * 10ms\n", 40 - timeout); + } + /* SDC BG and FG must be disabled before DMA is disabled */ + if (wait_for_stop && (channel == IDMAC_SDC_0 || + channel == IDMAC_SDC_1)) { + for (timeout = 5; + timeout && !ipu_irq_status(ichan->eof_irq); timeout--) + msleep(5); + } + + spin_lock_irqsave(&ipu->lock, flags); + + /* Disable IC task */ + ipu_ic_disable_task(ipu, channel); + + /* Disable DMA channel(s) */ + reg = idmac_read_icreg(ipu, IDMAC_CHA_EN); + idmac_write_icreg(ipu, reg & ~chan_mask, IDMAC_CHA_EN); + + spin_unlock_irqrestore(&ipu->lock, flags); + + return 0; +} + +static struct scatterlist *idmac_sg_next(struct idmac_channel *ichan, + struct idmac_tx_desc **desc, struct scatterlist *sg) +{ + struct scatterlist *sgnew = sg ? sg_next(sg) : NULL; + + if (sgnew) + /* next sg-element in this list */ + return sgnew; + + if ((*desc)->list.next == &ichan->queue) + /* No more descriptors on the queue */ + return NULL; + + /* Fetch next descriptor */ + *desc = list_entry((*desc)->list.next, struct idmac_tx_desc, list); + return (*desc)->sg; +} + +/* + * We have several possibilities here: + * current BUF next BUF + * + * not last sg next not last sg + * not last sg next last sg + * last sg first sg from next descriptor + * last sg NULL + * + * Besides, the descriptor queue might be empty or not. We process all these + * cases carefully. + */ +static irqreturn_t idmac_interrupt(int irq, void *dev_id) +{ + struct idmac_channel *ichan = dev_id; + struct device *dev = &ichan->dma_chan.dev->device; + unsigned int chan_id = ichan->dma_chan.chan_id; + struct scatterlist **sg, *sgnext, *sgnew = NULL; + /* Next transfer descriptor */ + struct idmac_tx_desc *desc, *descnew; + dma_async_tx_callback callback; + void *callback_param; + bool done = false; + u32 ready0, ready1, curbuf, err; + unsigned long flags; + + /* IDMAC has cleared the respective BUFx_RDY bit, we manage the buffer */ + + dev_dbg(dev, "IDMAC irq %d, buf %d\n", irq, ichan->active_buffer); + + spin_lock_irqsave(&ipu_data.lock, flags); + + ready0 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY); + ready1 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY); + curbuf = idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF); + err = idmac_read_ipureg(&ipu_data, IPU_INT_STAT_4); + + if (err & (1 << chan_id)) { + idmac_write_ipureg(&ipu_data, 1 << chan_id, IPU_INT_STAT_4); + spin_unlock_irqrestore(&ipu_data.lock, flags); + /* + * Doing this + * ichan->sg[0] = ichan->sg[1] = NULL; + * you can force channel re-enable on the next tx_submit(), but + * this is dirty - think about descriptors with multiple + * sg elements. + */ + dev_warn(dev, "NFB4EOF on channel %d, ready %x, %x, cur %x\n", + chan_id, ready0, ready1, curbuf); + return IRQ_HANDLED; + } + spin_unlock_irqrestore(&ipu_data.lock, flags); + + /* Other interrupts do not interfere with this channel */ + spin_lock(&ichan->lock); + if (unlikely((ichan->active_buffer && (ready1 >> chan_id) & 1) || + (!ichan->active_buffer && (ready0 >> chan_id) & 1) + )) { + spin_unlock(&ichan->lock); + dev_dbg(dev, + "IRQ with active buffer still ready on channel %x, " + "active %d, ready %x, %x!\n", chan_id, + ichan->active_buffer, ready0, ready1); + return IRQ_NONE; + } + + if (unlikely(list_empty(&ichan->queue))) { + ichan->sg[ichan->active_buffer] = NULL; + spin_unlock(&ichan->lock); + dev_err(dev, + "IRQ without queued buffers on channel %x, active %d, " + "ready %x, %x!\n", chan_id, + ichan->active_buffer, ready0, ready1); + return IRQ_NONE; + } + + /* + * active_buffer is a software flag, it shows which buffer we are + * currently expecting back from the hardware, IDMAC should be + * processing the other buffer already + */ + sg = &ichan->sg[ichan->active_buffer]; + sgnext = ichan->sg[!ichan->active_buffer]; + + if (!*sg) { + spin_unlock(&ichan->lock); + return IRQ_HANDLED; + } + + desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list); + descnew = desc; + + dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n", + irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf); + + /* Find the descriptor of sgnext */ + sgnew = idmac_sg_next(ichan, &descnew, *sg); + if (sgnext != sgnew) + dev_err(dev, "Submitted buffer %p, next buffer %p\n", sgnext, sgnew); + + /* + * if sgnext == NULL sg must be the last element in a scatterlist and + * queue must be empty + */ + if (unlikely(!sgnext)) { + if (!WARN_ON(sg_next(*sg))) + dev_dbg(dev, "Underrun on channel %x\n", chan_id); + ichan->sg[!ichan->active_buffer] = sgnew; + + if (unlikely(sgnew)) { + ipu_submit_buffer(ichan, descnew, sgnew, !ichan->active_buffer); + } else { + spin_lock_irqsave(&ipu_data.lock, flags); + ipu_ic_disable_task(&ipu_data, chan_id); + spin_unlock_irqrestore(&ipu_data.lock, flags); + ichan->status = IPU_CHANNEL_READY; + /* Continue to check for complete descriptor */ + } + } + + /* Calculate and submit the next sg element */ + sgnew = idmac_sg_next(ichan, &descnew, sgnew); + + if (unlikely(!sg_next(*sg)) || !sgnext) { + /* + * Last element in scatterlist done, remove from the queue, + * _init for debugging + */ + list_del_init(&desc->list); + done = true; + } + + *sg = sgnew; + + if (likely(sgnew) && + ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) { + callback = descnew->txd.callback; + callback_param = descnew->txd.callback_param; + list_del_init(&descnew->list); + spin_unlock(&ichan->lock); + if (callback) + callback(callback_param); + spin_lock(&ichan->lock); + } + + /* Flip the active buffer - even if update above failed */ + ichan->active_buffer = !ichan->active_buffer; + if (done) + dma_cookie_complete(&desc->txd); + + callback = desc->txd.callback; + callback_param = desc->txd.callback_param; + + spin_unlock(&ichan->lock); + + if (done && (desc->txd.flags & DMA_PREP_INTERRUPT) && callback) + callback(callback_param); + + return IRQ_HANDLED; +} + +static void ipu_gc_tasklet(unsigned long arg) +{ + struct ipu *ipu = (struct ipu *)arg; + int i; + + for (i = 0; i < IPU_CHANNELS_NUM; i++) { + struct idmac_channel *ichan = ipu->channel + i; + struct idmac_tx_desc *desc; + unsigned long flags; + struct scatterlist *sg; + int j, k; + + for (j = 0; j < ichan->n_tx_desc; j++) { + desc = ichan->desc + j; + spin_lock_irqsave(&ichan->lock, flags); + if (async_tx_test_ack(&desc->txd)) { + list_move(&desc->list, &ichan->free_list); + for_each_sg(desc->sg, sg, desc->sg_len, k) { + if (ichan->sg[0] == sg) + ichan->sg[0] = NULL; + else if (ichan->sg[1] == sg) + ichan->sg[1] = NULL; + } + async_tx_clear_ack(&desc->txd); + } + spin_unlock_irqrestore(&ichan->lock, flags); + } + } +} + +/* Allocate and initialise a transfer descriptor. */ +static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long tx_flags, + void *context) +{ + struct idmac_channel *ichan = to_idmac_chan(chan); + struct idmac_tx_desc *desc = NULL; + struct dma_async_tx_descriptor *txd = NULL; + unsigned long flags; + + /* We only can handle these three channels so far */ + if (chan->chan_id != IDMAC_SDC_0 && chan->chan_id != IDMAC_SDC_1 && + chan->chan_id != IDMAC_IC_7) + return NULL; + + if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) { + dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction); + return NULL; + } + + mutex_lock(&ichan->chan_mutex); + + spin_lock_irqsave(&ichan->lock, flags); + if (!list_empty(&ichan->free_list)) { + desc = list_entry(ichan->free_list.next, + struct idmac_tx_desc, list); + + list_del_init(&desc->list); + + desc->sg_len = sg_len; + desc->sg = sgl; + txd = &desc->txd; + txd->flags = tx_flags; + } + spin_unlock_irqrestore(&ichan->lock, flags); + + mutex_unlock(&ichan->chan_mutex); + + tasklet_schedule(&to_ipu(to_idmac(chan->device))->tasklet); + + return txd; +} + +/* Re-select the current buffer and re-activate the channel */ +static void idmac_issue_pending(struct dma_chan *chan) +{ + struct idmac_channel *ichan = to_idmac_chan(chan); + struct idmac *idmac = to_idmac(chan->device); + struct ipu *ipu = to_ipu(idmac); + unsigned long flags; + + /* This is not always needed, but doesn't hurt either */ + spin_lock_irqsave(&ipu->lock, flags); + ipu_select_buffer(chan->chan_id, ichan->active_buffer); + spin_unlock_irqrestore(&ipu->lock, flags); + + /* + * Might need to perform some parts of initialisation from + * ipu_enable_channel(), but not all, we do not want to reset to buffer + * 0, don't need to set priority again either, but re-enabling the task + * and the channel might be a good idea. + */ +} + +static int __idmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct idmac_channel *ichan = to_idmac_chan(chan); + struct idmac *idmac = to_idmac(chan->device); + struct ipu *ipu = to_ipu(idmac); + struct list_head *list, *tmp; + unsigned long flags; + int i; + + switch (cmd) { + case DMA_PAUSE: + spin_lock_irqsave(&ipu->lock, flags); + ipu_ic_disable_task(ipu, chan->chan_id); + + /* Return all descriptors into "prepared" state */ + list_for_each_safe(list, tmp, &ichan->queue) + list_del_init(list); + + ichan->sg[0] = NULL; + ichan->sg[1] = NULL; + + spin_unlock_irqrestore(&ipu->lock, flags); + + ichan->status = IPU_CHANNEL_INITIALIZED; + break; + case DMA_TERMINATE_ALL: + ipu_disable_channel(idmac, ichan, + ichan->status >= IPU_CHANNEL_ENABLED); + + tasklet_disable(&ipu->tasklet); + + /* ichan->queue is modified in ISR, have to spinlock */ + spin_lock_irqsave(&ichan->lock, flags); + list_splice_init(&ichan->queue, &ichan->free_list); + + if (ichan->desc) + for (i = 0; i < ichan->n_tx_desc; i++) { + struct idmac_tx_desc *desc = ichan->desc + i; + if (list_empty(&desc->list)) + /* Descriptor was prepared, but not submitted */ + list_add(&desc->list, &ichan->free_list); + + async_tx_clear_ack(&desc->txd); + } + + ichan->sg[0] = NULL; + ichan->sg[1] = NULL; + spin_unlock_irqrestore(&ichan->lock, flags); + + tasklet_enable(&ipu->tasklet); + + ichan->status = IPU_CHANNEL_INITIALIZED; + break; + default: + return -ENOSYS; + } + + return 0; +} + +static int idmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct idmac_channel *ichan = to_idmac_chan(chan); + int ret; + + mutex_lock(&ichan->chan_mutex); + + ret = __idmac_control(chan, cmd, arg); + + mutex_unlock(&ichan->chan_mutex); + + return ret; +} + +#ifdef DEBUG +static irqreturn_t ic_sof_irq(int irq, void *dev_id) +{ + struct idmac_channel *ichan = dev_id; + printk(KERN_DEBUG "Got SOF IRQ %d on Channel %d\n", + irq, ichan->dma_chan.chan_id); + disable_irq_nosync(irq); + return IRQ_HANDLED; +} + +static irqreturn_t ic_eof_irq(int irq, void *dev_id) +{ + struct idmac_channel *ichan = dev_id; + printk(KERN_DEBUG "Got EOF IRQ %d on Channel %d\n", + irq, ichan->dma_chan.chan_id); + disable_irq_nosync(irq); + return IRQ_HANDLED; +} + +static int ic_sof = -EINVAL, ic_eof = -EINVAL; +#endif + +static int idmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct idmac_channel *ichan = to_idmac_chan(chan); + struct idmac *idmac = to_idmac(chan->device); + int ret; + + /* dmaengine.c now guarantees to only offer free channels */ + BUG_ON(chan->client_count > 1); + WARN_ON(ichan->status != IPU_CHANNEL_FREE); + + dma_cookie_init(chan); + + ret = ipu_irq_map(chan->chan_id); + if (ret < 0) + goto eimap; + + ichan->eof_irq = ret; + + /* + * Important to first disable the channel, because maybe someone + * used it before us, e.g., the bootloader + */ + ipu_disable_channel(idmac, ichan, true); + + ret = ipu_init_channel(idmac, ichan); + if (ret < 0) + goto eichan; + + ret = request_irq(ichan->eof_irq, idmac_interrupt, 0, + ichan->eof_name, ichan); + if (ret < 0) + goto erirq; + +#ifdef DEBUG + if (chan->chan_id == IDMAC_IC_7) { + ic_sof = ipu_irq_map(69); + if (ic_sof > 0) + request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan); + ic_eof = ipu_irq_map(70); + if (ic_eof > 0) + request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan); + } +#endif + + ichan->status = IPU_CHANNEL_INITIALIZED; + + dev_dbg(&chan->dev->device, "Found channel 0x%x, irq %d\n", + chan->chan_id, ichan->eof_irq); + + return ret; + +erirq: + ipu_uninit_channel(idmac, ichan); +eichan: + ipu_irq_unmap(chan->chan_id); +eimap: + return ret; +} + +static void idmac_free_chan_resources(struct dma_chan *chan) +{ + struct idmac_channel *ichan = to_idmac_chan(chan); + struct idmac *idmac = to_idmac(chan->device); + + mutex_lock(&ichan->chan_mutex); + + __idmac_control(chan, DMA_TERMINATE_ALL, 0); + + if (ichan->status > IPU_CHANNEL_FREE) { +#ifdef DEBUG + if (chan->chan_id == IDMAC_IC_7) { + if (ic_sof > 0) { + free_irq(ic_sof, ichan); + ipu_irq_unmap(69); + ic_sof = -EINVAL; + } + if (ic_eof > 0) { + free_irq(ic_eof, ichan); + ipu_irq_unmap(70); + ic_eof = -EINVAL; + } + } +#endif + free_irq(ichan->eof_irq, ichan); + ipu_irq_unmap(chan->chan_id); + } + + ichan->status = IPU_CHANNEL_FREE; + + ipu_uninit_channel(idmac, ichan); + + mutex_unlock(&ichan->chan_mutex); + + tasklet_schedule(&to_ipu(idmac)->tasklet); +} + +static enum dma_status idmac_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0); + if (cookie != chan->cookie) + return DMA_ERROR; + return DMA_SUCCESS; +} + +static int __init ipu_idmac_init(struct ipu *ipu) +{ + struct idmac *idmac = &ipu->idmac; + struct dma_device *dma = &idmac->dma; + int i; + + dma_cap_set(DMA_SLAVE, dma->cap_mask); + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + + /* Compulsory common fields */ + dma->dev = ipu->dev; + dma->device_alloc_chan_resources = idmac_alloc_chan_resources; + dma->device_free_chan_resources = idmac_free_chan_resources; + dma->device_tx_status = idmac_tx_status; + dma->device_issue_pending = idmac_issue_pending; + + /* Compulsory for DMA_SLAVE fields */ + dma->device_prep_slave_sg = idmac_prep_slave_sg; + dma->device_control = idmac_control; + + INIT_LIST_HEAD(&dma->channels); + for (i = 0; i < IPU_CHANNELS_NUM; i++) { + struct idmac_channel *ichan = ipu->channel + i; + struct dma_chan *dma_chan = &ichan->dma_chan; + + spin_lock_init(&ichan->lock); + mutex_init(&ichan->chan_mutex); + + ichan->status = IPU_CHANNEL_FREE; + ichan->sec_chan_en = false; + snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i); + + dma_chan->device = &idmac->dma; + dma_cookie_init(dma_chan); + dma_chan->chan_id = i; + list_add_tail(&dma_chan->device_node, &dma->channels); + } + + idmac_write_icreg(ipu, 0x00000070, IDMAC_CONF); + + return dma_async_device_register(&idmac->dma); +} + +static void __exit ipu_idmac_exit(struct ipu *ipu) +{ + int i; + struct idmac *idmac = &ipu->idmac; + + for (i = 0; i < IPU_CHANNELS_NUM; i++) { + struct idmac_channel *ichan = ipu->channel + i; + + idmac_control(&ichan->dma_chan, DMA_TERMINATE_ALL, 0); + } + + dma_async_device_unregister(&idmac->dma); +} + +/***************************************************************************** + * IPU common probe / remove + */ + +static int __init ipu_probe(struct platform_device *pdev) +{ + struct ipu_platform_data *pdata = pdev->dev.platform_data; + struct resource *mem_ipu, *mem_ic; + int ret; + + spin_lock_init(&ipu_data.lock); + + mem_ipu = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mem_ic = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!pdata || !mem_ipu || !mem_ic) + return -EINVAL; + + ipu_data.dev = &pdev->dev; + + platform_set_drvdata(pdev, &ipu_data); + + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_noirq; + + ipu_data.irq_fn = ret; + ret = platform_get_irq(pdev, 1); + if (ret < 0) + goto err_noirq; + + ipu_data.irq_err = ret; + ipu_data.irq_base = pdata->irq_base; + + dev_dbg(&pdev->dev, "fn irq %u, err irq %u, irq-base %u\n", + ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base); + + /* Remap IPU common registers */ + ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu)); + if (!ipu_data.reg_ipu) { + ret = -ENOMEM; + goto err_ioremap_ipu; + } + + /* Remap Image Converter and Image DMA Controller registers */ + ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic)); + if (!ipu_data.reg_ic) { + ret = -ENOMEM; + goto err_ioremap_ic; + } + + /* Get IPU clock */ + ipu_data.ipu_clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(ipu_data.ipu_clk)) { + ret = PTR_ERR(ipu_data.ipu_clk); + goto err_clk_get; + } + + /* Make sure IPU HSP clock is running */ + clk_enable(ipu_data.ipu_clk); + + /* Disable all interrupts */ + idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_1); + idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_2); + idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_3); + idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_4); + idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_5); + + dev_dbg(&pdev->dev, "%s @ 0x%08lx, fn irq %u, err irq %u\n", pdev->name, + (unsigned long)mem_ipu->start, ipu_data.irq_fn, ipu_data.irq_err); + + ret = ipu_irq_attach_irq(&ipu_data, pdev); + if (ret < 0) + goto err_attach_irq; + + /* Initialize DMA engine */ + ret = ipu_idmac_init(&ipu_data); + if (ret < 0) + goto err_idmac_init; + + tasklet_init(&ipu_data.tasklet, ipu_gc_tasklet, (unsigned long)&ipu_data); + + ipu_data.dev = &pdev->dev; + + dev_dbg(ipu_data.dev, "IPU initialized\n"); + + return 0; + +err_idmac_init: +err_attach_irq: + ipu_irq_detach_irq(&ipu_data, pdev); + clk_disable(ipu_data.ipu_clk); + clk_put(ipu_data.ipu_clk); +err_clk_get: + iounmap(ipu_data.reg_ic); +err_ioremap_ic: + iounmap(ipu_data.reg_ipu); +err_ioremap_ipu: +err_noirq: + dev_err(&pdev->dev, "Failed to probe IPU: %d\n", ret); + return ret; +} + +static int __exit ipu_remove(struct platform_device *pdev) +{ + struct ipu *ipu = platform_get_drvdata(pdev); + + ipu_idmac_exit(ipu); + ipu_irq_detach_irq(ipu, pdev); + clk_disable(ipu->ipu_clk); + clk_put(ipu->ipu_clk); + iounmap(ipu->reg_ic); + iounmap(ipu->reg_ipu); + tasklet_kill(&ipu->tasklet); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +/* + * We need two MEM resources - with IPU-common and Image Converter registers, + * including PF_CONF and IDMAC_* registers, and two IRQs - function and error + */ +static struct platform_driver ipu_platform_driver = { + .driver = { + .name = "ipu-core", + .owner = THIS_MODULE, + }, + .remove = __exit_p(ipu_remove), +}; + +static int __init ipu_init(void) +{ + return platform_driver_probe(&ipu_platform_driver, ipu_probe); +} +subsys_initcall(ipu_init); + +MODULE_DESCRIPTION("IPU core driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>"); +MODULE_ALIAS("platform:ipu-core"); diff --git a/drivers/dma/ipu/ipu_intern.h b/drivers/dma/ipu/ipu_intern.h new file mode 100644 index 00000000..545cf11a --- /dev/null +++ b/drivers/dma/ipu/ipu_intern.h @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2008 + * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de> + * + * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _IPU_INTERN_H_ +#define _IPU_INTERN_H_ + +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> + +/* IPU Common registers */ +#define IPU_CONF 0x00 +#define IPU_CHA_BUF0_RDY 0x04 +#define IPU_CHA_BUF1_RDY 0x08 +#define IPU_CHA_DB_MODE_SEL 0x0C +#define IPU_CHA_CUR_BUF 0x10 +#define IPU_FS_PROC_FLOW 0x14 +#define IPU_FS_DISP_FLOW 0x18 +#define IPU_TASKS_STAT 0x1C +#define IPU_IMA_ADDR 0x20 +#define IPU_IMA_DATA 0x24 +#define IPU_INT_CTRL_1 0x28 +#define IPU_INT_CTRL_2 0x2C +#define IPU_INT_CTRL_3 0x30 +#define IPU_INT_CTRL_4 0x34 +#define IPU_INT_CTRL_5 0x38 +#define IPU_INT_STAT_1 0x3C +#define IPU_INT_STAT_2 0x40 +#define IPU_INT_STAT_3 0x44 +#define IPU_INT_STAT_4 0x48 +#define IPU_INT_STAT_5 0x4C +#define IPU_BRK_CTRL_1 0x50 +#define IPU_BRK_CTRL_2 0x54 +#define IPU_BRK_STAT 0x58 +#define IPU_DIAGB_CTRL 0x5C + +/* IPU_CONF Register bits */ +#define IPU_CONF_CSI_EN 0x00000001 +#define IPU_CONF_IC_EN 0x00000002 +#define IPU_CONF_ROT_EN 0x00000004 +#define IPU_CONF_PF_EN 0x00000008 +#define IPU_CONF_SDC_EN 0x00000010 +#define IPU_CONF_ADC_EN 0x00000020 +#define IPU_CONF_DI_EN 0x00000040 +#define IPU_CONF_DU_EN 0x00000080 +#define IPU_CONF_PXL_ENDIAN 0x00000100 + +/* Image Converter Registers */ +#define IC_CONF 0x88 +#define IC_PRP_ENC_RSC 0x8C +#define IC_PRP_VF_RSC 0x90 +#define IC_PP_RSC 0x94 +#define IC_CMBP_1 0x98 +#define IC_CMBP_2 0x9C +#define PF_CONF 0xA0 +#define IDMAC_CONF 0xA4 +#define IDMAC_CHA_EN 0xA8 +#define IDMAC_CHA_PRI 0xAC +#define IDMAC_CHA_BUSY 0xB0 + +/* Image Converter Register bits */ +#define IC_CONF_PRPENC_EN 0x00000001 +#define IC_CONF_PRPENC_CSC1 0x00000002 +#define IC_CONF_PRPENC_ROT_EN 0x00000004 +#define IC_CONF_PRPVF_EN 0x00000100 +#define IC_CONF_PRPVF_CSC1 0x00000200 +#define IC_CONF_PRPVF_CSC2 0x00000400 +#define IC_CONF_PRPVF_CMB 0x00000800 +#define IC_CONF_PRPVF_ROT_EN 0x00001000 +#define IC_CONF_PP_EN 0x00010000 +#define IC_CONF_PP_CSC1 0x00020000 +#define IC_CONF_PP_CSC2 0x00040000 +#define IC_CONF_PP_CMB 0x00080000 +#define IC_CONF_PP_ROT_EN 0x00100000 +#define IC_CONF_IC_GLB_LOC_A 0x10000000 +#define IC_CONF_KEY_COLOR_EN 0x20000000 +#define IC_CONF_RWS_EN 0x40000000 +#define IC_CONF_CSI_MEM_WR_EN 0x80000000 + +#define IDMA_CHAN_INVALID 0x000000FF +#define IDMA_IC_0 0x00000001 +#define IDMA_IC_1 0x00000002 +#define IDMA_IC_2 0x00000004 +#define IDMA_IC_3 0x00000008 +#define IDMA_IC_4 0x00000010 +#define IDMA_IC_5 0x00000020 +#define IDMA_IC_6 0x00000040 +#define IDMA_IC_7 0x00000080 +#define IDMA_IC_8 0x00000100 +#define IDMA_IC_9 0x00000200 +#define IDMA_IC_10 0x00000400 +#define IDMA_IC_11 0x00000800 +#define IDMA_IC_12 0x00001000 +#define IDMA_IC_13 0x00002000 +#define IDMA_SDC_BG 0x00004000 +#define IDMA_SDC_FG 0x00008000 +#define IDMA_SDC_MASK 0x00010000 +#define IDMA_SDC_PARTIAL 0x00020000 +#define IDMA_ADC_SYS1_WR 0x00040000 +#define IDMA_ADC_SYS2_WR 0x00080000 +#define IDMA_ADC_SYS1_CMD 0x00100000 +#define IDMA_ADC_SYS2_CMD 0x00200000 +#define IDMA_ADC_SYS1_RD 0x00400000 +#define IDMA_ADC_SYS2_RD 0x00800000 +#define IDMA_PF_QP 0x01000000 +#define IDMA_PF_BSP 0x02000000 +#define IDMA_PF_Y_IN 0x04000000 +#define IDMA_PF_U_IN 0x08000000 +#define IDMA_PF_V_IN 0x10000000 +#define IDMA_PF_Y_OUT 0x20000000 +#define IDMA_PF_U_OUT 0x40000000 +#define IDMA_PF_V_OUT 0x80000000 + +#define TSTAT_PF_H264_PAUSE 0x00000001 +#define TSTAT_CSI2MEM_MASK 0x0000000C +#define TSTAT_CSI2MEM_OFFSET 2 +#define TSTAT_VF_MASK 0x00000600 +#define TSTAT_VF_OFFSET 9 +#define TSTAT_VF_ROT_MASK 0x000C0000 +#define TSTAT_VF_ROT_OFFSET 18 +#define TSTAT_ENC_MASK 0x00000180 +#define TSTAT_ENC_OFFSET 7 +#define TSTAT_ENC_ROT_MASK 0x00030000 +#define TSTAT_ENC_ROT_OFFSET 16 +#define TSTAT_PP_MASK 0x00001800 +#define TSTAT_PP_OFFSET 11 +#define TSTAT_PP_ROT_MASK 0x00300000 +#define TSTAT_PP_ROT_OFFSET 20 +#define TSTAT_PF_MASK 0x00C00000 +#define TSTAT_PF_OFFSET 22 +#define TSTAT_ADCSYS1_MASK 0x03000000 +#define TSTAT_ADCSYS1_OFFSET 24 +#define TSTAT_ADCSYS2_MASK 0x0C000000 +#define TSTAT_ADCSYS2_OFFSET 26 + +#define TASK_STAT_IDLE 0 +#define TASK_STAT_ACTIVE 1 +#define TASK_STAT_WAIT4READY 2 + +struct idmac { + struct dma_device dma; +}; + +struct ipu { + void __iomem *reg_ipu; + void __iomem *reg_ic; + unsigned int irq_fn; /* IPU Function IRQ to the CPU */ + unsigned int irq_err; /* IPU Error IRQ to the CPU */ + unsigned int irq_base; /* Beginning of the IPU IRQ range */ + unsigned long channel_init_mask; + spinlock_t lock; + struct clk *ipu_clk; + struct device *dev; + struct idmac idmac; + struct idmac_channel channel[IPU_CHANNELS_NUM]; + struct tasklet_struct tasklet; +}; + +#define to_idmac(d) container_of(d, struct idmac, dma) + +extern int ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev); +extern void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev); + +extern bool ipu_irq_status(uint32_t irq); +extern int ipu_irq_map(unsigned int source); +extern int ipu_irq_unmap(unsigned int source); + +#endif diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c new file mode 100644 index 00000000..a71f55e7 --- /dev/null +++ b/drivers/dma/ipu/ipu_irq.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2008 + * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/clk.h> +#include <linux/irq.h> +#include <linux/io.h> + +#include <mach/ipu.h> + +#include "ipu_intern.h" + +/* + * Register read / write - shall be inlined by the compiler + */ +static u32 ipu_read_reg(struct ipu *ipu, unsigned long reg) +{ + return __raw_readl(ipu->reg_ipu + reg); +} + +static void ipu_write_reg(struct ipu *ipu, u32 value, unsigned long reg) +{ + __raw_writel(value, ipu->reg_ipu + reg); +} + + +/* + * IPU IRQ chip driver + */ + +#define IPU_IRQ_NR_FN_BANKS 3 +#define IPU_IRQ_NR_ERR_BANKS 2 +#define IPU_IRQ_NR_BANKS (IPU_IRQ_NR_FN_BANKS + IPU_IRQ_NR_ERR_BANKS) + +struct ipu_irq_bank { + unsigned int control; + unsigned int status; + spinlock_t lock; + struct ipu *ipu; +}; + +static struct ipu_irq_bank irq_bank[IPU_IRQ_NR_BANKS] = { + /* 3 groups of functional interrupts */ + { + .control = IPU_INT_CTRL_1, + .status = IPU_INT_STAT_1, + }, { + .control = IPU_INT_CTRL_2, + .status = IPU_INT_STAT_2, + }, { + .control = IPU_INT_CTRL_3, + .status = IPU_INT_STAT_3, + }, + /* 2 groups of error interrupts */ + { + .control = IPU_INT_CTRL_4, + .status = IPU_INT_STAT_4, + }, { + .control = IPU_INT_CTRL_5, + .status = IPU_INT_STAT_5, + }, +}; + +struct ipu_irq_map { + unsigned int irq; + int source; + struct ipu_irq_bank *bank; + struct ipu *ipu; +}; + +static struct ipu_irq_map irq_map[CONFIG_MX3_IPU_IRQS]; +/* Protects allocations from the above array of maps */ +static DEFINE_MUTEX(map_lock); +/* Protects register accesses and individual mappings */ +static DEFINE_RAW_SPINLOCK(bank_lock); + +static struct ipu_irq_map *src2map(unsigned int src) +{ + int i; + + for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) + if (irq_map[i].source == src) + return irq_map + i; + + return NULL; +} + +static void ipu_irq_unmask(struct irq_data *d) +{ + struct ipu_irq_map *map = irq_data_get_irq_chip_data(d); + struct ipu_irq_bank *bank; + uint32_t reg; + unsigned long lock_flags; + + raw_spin_lock_irqsave(&bank_lock, lock_flags); + + bank = map->bank; + if (!bank) { + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); + pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq); + return; + } + + reg = ipu_read_reg(bank->ipu, bank->control); + reg |= (1UL << (map->source & 31)); + ipu_write_reg(bank->ipu, reg, bank->control); + + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); +} + +static void ipu_irq_mask(struct irq_data *d) +{ + struct ipu_irq_map *map = irq_data_get_irq_chip_data(d); + struct ipu_irq_bank *bank; + uint32_t reg; + unsigned long lock_flags; + + raw_spin_lock_irqsave(&bank_lock, lock_flags); + + bank = map->bank; + if (!bank) { + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); + pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq); + return; + } + + reg = ipu_read_reg(bank->ipu, bank->control); + reg &= ~(1UL << (map->source & 31)); + ipu_write_reg(bank->ipu, reg, bank->control); + + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); +} + +static void ipu_irq_ack(struct irq_data *d) +{ + struct ipu_irq_map *map = irq_data_get_irq_chip_data(d); + struct ipu_irq_bank *bank; + unsigned long lock_flags; + + raw_spin_lock_irqsave(&bank_lock, lock_flags); + + bank = map->bank; + if (!bank) { + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); + pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq); + return; + } + + ipu_write_reg(bank->ipu, 1UL << (map->source & 31), bank->status); + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); +} + +/** + * ipu_irq_status() - returns the current interrupt status of the specified IRQ. + * @irq: interrupt line to get status for. + * @return: true if the interrupt is pending/asserted or false if the + * interrupt is not pending. + */ +bool ipu_irq_status(unsigned int irq) +{ + struct ipu_irq_map *map = irq_get_chip_data(irq); + struct ipu_irq_bank *bank; + unsigned long lock_flags; + bool ret; + + raw_spin_lock_irqsave(&bank_lock, lock_flags); + bank = map->bank; + ret = bank && ipu_read_reg(bank->ipu, bank->status) & + (1UL << (map->source & 31)); + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); + + return ret; +} + +/** + * ipu_irq_map() - map an IPU interrupt source to an IRQ number + * @source: interrupt source bit position (see below) + * @return: mapped IRQ number or negative error code + * + * The source parameter has to be explained further. On i.MX31 IPU has 137 IRQ + * sources, they are broken down in 5 32-bit registers, like 32, 32, 24, 32, 17. + * However, the source argument of this function is not the sequence number of + * the possible IRQ, but rather its bit position. So, first interrupt in fourth + * register has source number 96, and not 88. This makes calculations easier, + * and also provides forward compatibility with any future IPU implementations + * with any interrupt bit assignments. + */ +int ipu_irq_map(unsigned int source) +{ + int i, ret = -ENOMEM; + struct ipu_irq_map *map; + + might_sleep(); + + mutex_lock(&map_lock); + map = src2map(source); + if (map) { + pr_err("IPU: Source %u already mapped to IRQ %u\n", source, map->irq); + ret = -EBUSY; + goto out; + } + + for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) { + if (irq_map[i].source < 0) { + unsigned long lock_flags; + + raw_spin_lock_irqsave(&bank_lock, lock_flags); + irq_map[i].source = source; + irq_map[i].bank = irq_bank + source / 32; + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); + + ret = irq_map[i].irq; + pr_debug("IPU: mapped source %u to IRQ %u\n", + source, ret); + break; + } + } +out: + mutex_unlock(&map_lock); + + if (ret < 0) + pr_err("IPU: couldn't map source %u: %d\n", source, ret); + + return ret; +} + +/** + * ipu_irq_map() - map an IPU interrupt source to an IRQ number + * @source: interrupt source bit position (see ipu_irq_map()) + * @return: 0 or negative error code + */ +int ipu_irq_unmap(unsigned int source) +{ + int i, ret = -EINVAL; + + might_sleep(); + + mutex_lock(&map_lock); + for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) { + if (irq_map[i].source == source) { + unsigned long lock_flags; + + pr_debug("IPU: unmapped source %u from IRQ %u\n", + source, irq_map[i].irq); + + raw_spin_lock_irqsave(&bank_lock, lock_flags); + irq_map[i].source = -EINVAL; + irq_map[i].bank = NULL; + raw_spin_unlock_irqrestore(&bank_lock, lock_flags); + + ret = 0; + break; + } + } + mutex_unlock(&map_lock); + + return ret; +} + +/* Chained IRQ handler for IPU error interrupt */ +static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) +{ + struct ipu *ipu = irq_get_handler_data(irq); + u32 status; + int i, line; + + for (i = IPU_IRQ_NR_FN_BANKS; i < IPU_IRQ_NR_BANKS; i++) { + struct ipu_irq_bank *bank = irq_bank + i; + + raw_spin_lock(&bank_lock); + status = ipu_read_reg(ipu, bank->status); + /* + * Don't think we have to clear all interrupts here, they will + * be acked by ->handle_irq() (handle_level_irq). However, we + * might want to clear unhandled interrupts after the loop... + */ + status &= ipu_read_reg(ipu, bank->control); + raw_spin_unlock(&bank_lock); + while ((line = ffs(status))) { + struct ipu_irq_map *map; + + line--; + status &= ~(1UL << line); + + raw_spin_lock(&bank_lock); + map = src2map(32 * i + line); + if (map) + irq = map->irq; + raw_spin_unlock(&bank_lock); + + if (!map) { + pr_err("IPU: Interrupt on unmapped source %u bank %d\n", + line, i); + continue; + } + generic_handle_irq(irq); + } + } +} + +/* Chained IRQ handler for IPU function interrupt */ +static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc) +{ + struct ipu *ipu = irq_desc_get_handler_data(desc); + u32 status; + int i, line; + + for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) { + struct ipu_irq_bank *bank = irq_bank + i; + + raw_spin_lock(&bank_lock); + status = ipu_read_reg(ipu, bank->status); + /* Not clearing all interrupts, see above */ + status &= ipu_read_reg(ipu, bank->control); + raw_spin_unlock(&bank_lock); + while ((line = ffs(status))) { + struct ipu_irq_map *map; + + line--; + status &= ~(1UL << line); + + raw_spin_lock(&bank_lock); + map = src2map(32 * i + line); + if (map) + irq = map->irq; + raw_spin_unlock(&bank_lock); + + if (!map) { + pr_err("IPU: Interrupt on unmapped source %u bank %d\n", + line, i); + continue; + } + generic_handle_irq(irq); + } + } +} + +static struct irq_chip ipu_irq_chip = { + .name = "ipu_irq", + .irq_ack = ipu_irq_ack, + .irq_mask = ipu_irq_mask, + .irq_unmask = ipu_irq_unmask, +}; + +/* Install the IRQ handler */ +int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev) +{ + struct ipu_platform_data *pdata = dev->dev.platform_data; + unsigned int irq, irq_base, i; + + irq_base = pdata->irq_base; + + for (i = 0; i < IPU_IRQ_NR_BANKS; i++) + irq_bank[i].ipu = ipu; + + for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) { + int ret; + + irq = irq_base + i; + ret = irq_set_chip(irq, &ipu_irq_chip); + if (ret < 0) + return ret; + ret = irq_set_chip_data(irq, irq_map + i); + if (ret < 0) + return ret; + irq_map[i].ipu = ipu; + irq_map[i].irq = irq; + irq_map[i].source = -EINVAL; + irq_set_handler(irq, handle_level_irq); +#ifdef CONFIG_ARM + set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); +#endif + } + + irq_set_handler_data(ipu->irq_fn, ipu); + irq_set_chained_handler(ipu->irq_fn, ipu_irq_fn); + + irq_set_handler_data(ipu->irq_err, ipu); + irq_set_chained_handler(ipu->irq_err, ipu_irq_err); + + return 0; +} + +void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev) +{ + struct ipu_platform_data *pdata = dev->dev.platform_data; + unsigned int irq, irq_base; + + irq_base = pdata->irq_base; + + irq_set_chained_handler(ipu->irq_fn, NULL); + irq_set_handler_data(ipu->irq_fn, NULL); + + irq_set_chained_handler(ipu->irq_err, NULL); + irq_set_handler_data(ipu->irq_err, NULL); + + for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) { +#ifdef CONFIG_ARM + set_irq_flags(irq, 0); +#endif + irq_set_chip(irq, NULL); + irq_set_chip_data(irq, NULL); + } +} diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c new file mode 100644 index 00000000..2ab0a3d0 --- /dev/null +++ b/drivers/dma/mpc512x_dma.c @@ -0,0 +1,832 @@ +/* + * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008. + * Copyright (C) Semihalf 2009 + * Copyright (C) Ilya Yanok, Emcraft Systems 2010 + * + * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description + * (defines, structures and comments) was taken from MPC5121 DMA driver + * written by Hongjun Chen <hong-jun.chen@freescale.com>. + * + * Approved as OSADL project by a majority of OSADL members and funded + * by OSADL membership fees in 2009; for details see www.osadl.org. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This is initial version of MPC5121 DMA driver. Only memory to memory + * transfers are supported (tested using dmatest module). + */ + +#include <linux/module.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> + +#include <linux/random.h> + +#include "dmaengine.h" + +/* Number of DMA Transfer descriptors allocated per channel */ +#define MPC_DMA_DESCRIPTORS 64 + +/* Macro definitions */ +#define MPC_DMA_CHANNELS 64 +#define MPC_DMA_TCD_OFFSET 0x1000 + +/* Arbitration mode of group and channel */ +#define MPC_DMA_DMACR_EDCG (1 << 31) +#define MPC_DMA_DMACR_ERGA (1 << 3) +#define MPC_DMA_DMACR_ERCA (1 << 2) + +/* Error codes */ +#define MPC_DMA_DMAES_VLD (1 << 31) +#define MPC_DMA_DMAES_GPE (1 << 15) +#define MPC_DMA_DMAES_CPE (1 << 14) +#define MPC_DMA_DMAES_ERRCHN(err) \ + (((err) >> 8) & 0x3f) +#define MPC_DMA_DMAES_SAE (1 << 7) +#define MPC_DMA_DMAES_SOE (1 << 6) +#define MPC_DMA_DMAES_DAE (1 << 5) +#define MPC_DMA_DMAES_DOE (1 << 4) +#define MPC_DMA_DMAES_NCE (1 << 3) +#define MPC_DMA_DMAES_SGE (1 << 2) +#define MPC_DMA_DMAES_SBE (1 << 1) +#define MPC_DMA_DMAES_DBE (1 << 0) + +#define MPC_DMA_DMAGPOR_SNOOP_ENABLE (1 << 6) + +#define MPC_DMA_TSIZE_1 0x00 +#define MPC_DMA_TSIZE_2 0x01 +#define MPC_DMA_TSIZE_4 0x02 +#define MPC_DMA_TSIZE_16 0x04 +#define MPC_DMA_TSIZE_32 0x05 + +/* MPC5121 DMA engine registers */ +struct __attribute__ ((__packed__)) mpc_dma_regs { + /* 0x00 */ + u32 dmacr; /* DMA control register */ + u32 dmaes; /* DMA error status */ + /* 0x08 */ + u32 dmaerqh; /* DMA enable request high(channels 63~32) */ + u32 dmaerql; /* DMA enable request low(channels 31~0) */ + u32 dmaeeih; /* DMA enable error interrupt high(ch63~32) */ + u32 dmaeeil; /* DMA enable error interrupt low(ch31~0) */ + /* 0x18 */ + u8 dmaserq; /* DMA set enable request */ + u8 dmacerq; /* DMA clear enable request */ + u8 dmaseei; /* DMA set enable error interrupt */ + u8 dmaceei; /* DMA clear enable error interrupt */ + /* 0x1c */ + u8 dmacint; /* DMA clear interrupt request */ + u8 dmacerr; /* DMA clear error */ + u8 dmassrt; /* DMA set start bit */ + u8 dmacdne; /* DMA clear DONE status bit */ + /* 0x20 */ + u32 dmainth; /* DMA interrupt request high(ch63~32) */ + u32 dmaintl; /* DMA interrupt request low(ch31~0) */ + u32 dmaerrh; /* DMA error high(ch63~32) */ + u32 dmaerrl; /* DMA error low(ch31~0) */ + /* 0x30 */ + u32 dmahrsh; /* DMA hw request status high(ch63~32) */ + u32 dmahrsl; /* DMA hardware request status low(ch31~0) */ + union { + u32 dmaihsa; /* DMA interrupt high select AXE(ch63~32) */ + u32 dmagpor; /* (General purpose register on MPC8308) */ + }; + u32 dmailsa; /* DMA interrupt low select AXE(ch31~0) */ + /* 0x40 ~ 0xff */ + u32 reserve0[48]; /* Reserved */ + /* 0x100 */ + u8 dchpri[MPC_DMA_CHANNELS]; + /* DMA channels(0~63) priority */ +}; + +struct __attribute__ ((__packed__)) mpc_dma_tcd { + /* 0x00 */ + u32 saddr; /* Source address */ + + u32 smod:5; /* Source address modulo */ + u32 ssize:3; /* Source data transfer size */ + u32 dmod:5; /* Destination address modulo */ + u32 dsize:3; /* Destination data transfer size */ + u32 soff:16; /* Signed source address offset */ + + /* 0x08 */ + u32 nbytes; /* Inner "minor" byte count */ + u32 slast; /* Last source address adjustment */ + u32 daddr; /* Destination address */ + + /* 0x14 */ + u32 citer_elink:1; /* Enable channel-to-channel linking on + * minor loop complete + */ + u32 citer_linkch:6; /* Link channel for minor loop complete */ + u32 citer:9; /* Current "major" iteration count */ + u32 doff:16; /* Signed destination address offset */ + + /* 0x18 */ + u32 dlast_sga; /* Last Destination address adjustment/scatter + * gather address + */ + + /* 0x1c */ + u32 biter_elink:1; /* Enable channel-to-channel linking on major + * loop complete + */ + u32 biter_linkch:6; + u32 biter:9; /* Beginning "major" iteration count */ + u32 bwc:2; /* Bandwidth control */ + u32 major_linkch:6; /* Link channel number */ + u32 done:1; /* Channel done */ + u32 active:1; /* Channel active */ + u32 major_elink:1; /* Enable channel-to-channel linking on major + * loop complete + */ + u32 e_sg:1; /* Enable scatter/gather processing */ + u32 d_req:1; /* Disable request */ + u32 int_half:1; /* Enable an interrupt when major counter is + * half complete + */ + u32 int_maj:1; /* Enable an interrupt when major iteration + * count completes + */ + u32 start:1; /* Channel start */ +}; + +struct mpc_dma_desc { + struct dma_async_tx_descriptor desc; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + int error; + struct list_head node; +}; + +struct mpc_dma_chan { + struct dma_chan chan; + struct list_head free; + struct list_head prepared; + struct list_head queued; + struct list_head active; + struct list_head completed; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + + /* Lock for this structure */ + spinlock_t lock; +}; + +struct mpc_dma { + struct dma_device dma; + struct tasklet_struct tasklet; + struct mpc_dma_chan channels[MPC_DMA_CHANNELS]; + struct mpc_dma_regs __iomem *regs; + struct mpc_dma_tcd __iomem *tcd; + int irq; + int irq2; + uint error_status; + int is_mpc8308; + + /* Lock for error_status field in this structure */ + spinlock_t error_status_lock; +}; + +#define DRV_NAME "mpc512x_dma" + +/* Convert struct dma_chan to struct mpc_dma_chan */ +static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct mpc_dma_chan, chan); +} + +/* Convert struct dma_chan to struct mpc_dma */ +static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c); + return container_of(mchan, struct mpc_dma, channels[c->chan_id]); +} + +/* + * Execute all queued DMA descriptors. + * + * Following requirements must be met while calling mpc_dma_execute(): + * a) mchan->lock is acquired, + * b) mchan->active list is empty, + * c) mchan->queued list contains at least one entry. + */ +static void mpc_dma_execute(struct mpc_dma_chan *mchan) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan); + struct mpc_dma_desc *first = NULL; + struct mpc_dma_desc *prev = NULL; + struct mpc_dma_desc *mdesc; + int cid = mchan->chan.chan_id; + + /* Move all queued descriptors to active list */ + list_splice_tail_init(&mchan->queued, &mchan->active); + + /* Chain descriptors into one transaction */ + list_for_each_entry(mdesc, &mchan->active, node) { + if (!first) + first = mdesc; + + if (!prev) { + prev = mdesc; + continue; + } + + prev->tcd->dlast_sga = mdesc->tcd_paddr; + prev->tcd->e_sg = 1; + mdesc->tcd->start = 1; + + prev = mdesc; + } + + prev->tcd->int_maj = 1; + + /* Send first descriptor in chain into hardware */ + memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd)); + + if (first != prev) + mdma->tcd[cid].e_sg = 1; + out_8(&mdma->regs->dmassrt, cid); +} + +/* Handle interrupt on one half of DMA controller (32 channels) */ +static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off) +{ + struct mpc_dma_chan *mchan; + struct mpc_dma_desc *mdesc; + u32 status = is | es; + int ch; + + while ((ch = fls(status) - 1) >= 0) { + status &= ~(1 << ch); + mchan = &mdma->channels[ch + off]; + + spin_lock(&mchan->lock); + + out_8(&mdma->regs->dmacint, ch + off); + out_8(&mdma->regs->dmacerr, ch + off); + + /* Check error status */ + if (es & (1 << ch)) + list_for_each_entry(mdesc, &mchan->active, node) + mdesc->error = -EIO; + + /* Execute queued descriptors */ + list_splice_tail_init(&mchan->active, &mchan->completed); + if (!list_empty(&mchan->queued)) + mpc_dma_execute(mchan); + + spin_unlock(&mchan->lock); + } +} + +/* Interrupt handler */ +static irqreturn_t mpc_dma_irq(int irq, void *data) +{ + struct mpc_dma *mdma = data; + uint es; + + /* Save error status register */ + es = in_be32(&mdma->regs->dmaes); + spin_lock(&mdma->error_status_lock); + if ((es & MPC_DMA_DMAES_VLD) && mdma->error_status == 0) + mdma->error_status = es; + spin_unlock(&mdma->error_status_lock); + + /* Handle interrupt on each channel */ + if (mdma->dma.chancnt > 32) { + mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth), + in_be32(&mdma->regs->dmaerrh), 32); + } + mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl), + in_be32(&mdma->regs->dmaerrl), 0); + + /* Schedule tasklet */ + tasklet_schedule(&mdma->tasklet); + + return IRQ_HANDLED; +} + +/* process completed descriptors */ +static void mpc_dma_process_completed(struct mpc_dma *mdma) +{ + dma_cookie_t last_cookie = 0; + struct mpc_dma_chan *mchan; + struct mpc_dma_desc *mdesc; + struct dma_async_tx_descriptor *desc; + unsigned long flags; + LIST_HEAD(list); + int i; + + for (i = 0; i < mdma->dma.chancnt; i++) { + mchan = &mdma->channels[i]; + + /* Get all completed descriptors */ + spin_lock_irqsave(&mchan->lock, flags); + if (!list_empty(&mchan->completed)) + list_splice_tail_init(&mchan->completed, &list); + spin_unlock_irqrestore(&mchan->lock, flags); + + if (list_empty(&list)) + continue; + + /* Execute callbacks and run dependencies */ + list_for_each_entry(mdesc, &list, node) { + desc = &mdesc->desc; + + if (desc->callback) + desc->callback(desc->callback_param); + + last_cookie = desc->cookie; + dma_run_dependencies(desc); + } + + /* Free descriptors */ + spin_lock_irqsave(&mchan->lock, flags); + list_splice_tail_init(&list, &mchan->free); + mchan->chan.completed_cookie = last_cookie; + spin_unlock_irqrestore(&mchan->lock, flags); + } +} + +/* DMA Tasklet */ +static void mpc_dma_tasklet(unsigned long data) +{ + struct mpc_dma *mdma = (void *)data; + unsigned long flags; + uint es; + + spin_lock_irqsave(&mdma->error_status_lock, flags); + es = mdma->error_status; + mdma->error_status = 0; + spin_unlock_irqrestore(&mdma->error_status_lock, flags); + + /* Print nice error report */ + if (es) { + dev_err(mdma->dma.dev, + "Hardware reported following error(s) on channel %u:\n", + MPC_DMA_DMAES_ERRCHN(es)); + + if (es & MPC_DMA_DMAES_GPE) + dev_err(mdma->dma.dev, "- Group Priority Error\n"); + if (es & MPC_DMA_DMAES_CPE) + dev_err(mdma->dma.dev, "- Channel Priority Error\n"); + if (es & MPC_DMA_DMAES_SAE) + dev_err(mdma->dma.dev, "- Source Address Error\n"); + if (es & MPC_DMA_DMAES_SOE) + dev_err(mdma->dma.dev, "- Source Offset" + " Configuration Error\n"); + if (es & MPC_DMA_DMAES_DAE) + dev_err(mdma->dma.dev, "- Destination Address" + " Error\n"); + if (es & MPC_DMA_DMAES_DOE) + dev_err(mdma->dma.dev, "- Destination Offset" + " Configuration Error\n"); + if (es & MPC_DMA_DMAES_NCE) + dev_err(mdma->dma.dev, "- NBytes/Citter" + " Configuration Error\n"); + if (es & MPC_DMA_DMAES_SGE) + dev_err(mdma->dma.dev, "- Scatter/Gather" + " Configuration Error\n"); + if (es & MPC_DMA_DMAES_SBE) + dev_err(mdma->dma.dev, "- Source Bus Error\n"); + if (es & MPC_DMA_DMAES_DBE) + dev_err(mdma->dma.dev, "- Destination Bus Error\n"); + } + + mpc_dma_process_completed(mdma); +} + +/* Submit descriptor to hardware */ +static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(txd->chan); + struct mpc_dma_desc *mdesc; + unsigned long flags; + dma_cookie_t cookie; + + mdesc = container_of(txd, struct mpc_dma_desc, desc); + + spin_lock_irqsave(&mchan->lock, flags); + + /* Move descriptor to queue */ + list_move_tail(&mdesc->node, &mchan->queued); + + /* If channel is idle, execute all queued descriptors */ + if (list_empty(&mchan->active)) + mpc_dma_execute(mchan); + + /* Update cookie */ + cookie = dma_cookie_assign(txd); + spin_unlock_irqrestore(&mchan->lock, flags); + + return cookie; +} + +/* Alloc channel resources */ +static int mpc_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + unsigned long flags; + LIST_HEAD(descs); + int i; + + /* Alloc DMA memory for Transfer Control Descriptors */ + tcd = dma_alloc_coherent(mdma->dma.dev, + MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd), + &tcd_paddr, GFP_KERNEL); + if (!tcd) + return -ENOMEM; + + /* Alloc descriptors for this channel */ + for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) { + mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL); + if (!mdesc) { + dev_notice(mdma->dma.dev, "Memory allocation error. " + "Allocated only %u descriptors\n", i); + break; + } + + dma_async_tx_descriptor_init(&mdesc->desc, chan); + mdesc->desc.flags = DMA_CTRL_ACK; + mdesc->desc.tx_submit = mpc_dma_tx_submit; + + mdesc->tcd = &tcd[i]; + mdesc->tcd_paddr = tcd_paddr + (i * sizeof(struct mpc_dma_tcd)); + + list_add_tail(&mdesc->node, &descs); + } + + /* Return error only if no descriptors were allocated */ + if (i == 0) { + dma_free_coherent(mdma->dma.dev, + MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd), + tcd, tcd_paddr); + return -ENOMEM; + } + + spin_lock_irqsave(&mchan->lock, flags); + mchan->tcd = tcd; + mchan->tcd_paddr = tcd_paddr; + list_splice_tail_init(&descs, &mchan->free); + spin_unlock_irqrestore(&mchan->lock, flags); + + /* Enable Error Interrupt */ + out_8(&mdma->regs->dmaseei, chan->chan_id); + + return 0; +} + +/* Free channel resources */ +static void mpc_dma_free_chan_resources(struct dma_chan *chan) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc, *tmp; + struct mpc_dma_tcd *tcd; + dma_addr_t tcd_paddr; + unsigned long flags; + LIST_HEAD(descs); + + spin_lock_irqsave(&mchan->lock, flags); + + /* Channel must be idle */ + BUG_ON(!list_empty(&mchan->prepared)); + BUG_ON(!list_empty(&mchan->queued)); + BUG_ON(!list_empty(&mchan->active)); + BUG_ON(!list_empty(&mchan->completed)); + + /* Move data */ + list_splice_tail_init(&mchan->free, &descs); + tcd = mchan->tcd; + tcd_paddr = mchan->tcd_paddr; + + spin_unlock_irqrestore(&mchan->lock, flags); + + /* Free DMA memory used by descriptors */ + dma_free_coherent(mdma->dma.dev, + MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd), + tcd, tcd_paddr); + + /* Free descriptors */ + list_for_each_entry_safe(mdesc, tmp, &descs, node) + kfree(mdesc); + + /* Disable Error Interrupt */ + out_8(&mdma->regs->dmaceei, chan->chan_id); +} + +/* Send all pending descriptor to hardware */ +static void mpc_dma_issue_pending(struct dma_chan *chan) +{ + /* + * We are posting descriptors to the hardware as soon as + * they are ready, so this function does nothing. + */ +} + +/* Check request completion status */ +static enum dma_status +mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + enum dma_status ret; + unsigned long flags; + + spin_lock_irqsave(&mchan->lock, flags); + ret = dma_cookie_status(chan, cookie, txstate); + spin_unlock_irqrestore(&mchan->lock, flags); + + return ret; +} + +/* Prepare descriptor for memory to memory copy */ +static struct dma_async_tx_descriptor * +mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); + struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma_desc *mdesc = NULL; + struct mpc_dma_tcd *tcd; + unsigned long iflags; + + /* Get free descriptor */ + spin_lock_irqsave(&mchan->lock, iflags); + if (!list_empty(&mchan->free)) { + mdesc = list_first_entry(&mchan->free, struct mpc_dma_desc, + node); + list_del(&mdesc->node); + } + spin_unlock_irqrestore(&mchan->lock, iflags); + + if (!mdesc) { + /* try to free completed descriptors */ + mpc_dma_process_completed(mdma); + return NULL; + } + + mdesc->error = 0; + tcd = mdesc->tcd; + + /* Prepare Transfer Control Descriptor for this transaction */ + memset(tcd, 0, sizeof(struct mpc_dma_tcd)); + + if (IS_ALIGNED(src | dst | len, 32)) { + tcd->ssize = MPC_DMA_TSIZE_32; + tcd->dsize = MPC_DMA_TSIZE_32; + tcd->soff = 32; + tcd->doff = 32; + } else if (!mdma->is_mpc8308 && IS_ALIGNED(src | dst | len, 16)) { + /* MPC8308 doesn't support 16 byte transfers */ + tcd->ssize = MPC_DMA_TSIZE_16; + tcd->dsize = MPC_DMA_TSIZE_16; + tcd->soff = 16; + tcd->doff = 16; + } else if (IS_ALIGNED(src | dst | len, 4)) { + tcd->ssize = MPC_DMA_TSIZE_4; + tcd->dsize = MPC_DMA_TSIZE_4; + tcd->soff = 4; + tcd->doff = 4; + } else if (IS_ALIGNED(src | dst | len, 2)) { + tcd->ssize = MPC_DMA_TSIZE_2; + tcd->dsize = MPC_DMA_TSIZE_2; + tcd->soff = 2; + tcd->doff = 2; + } else { + tcd->ssize = MPC_DMA_TSIZE_1; + tcd->dsize = MPC_DMA_TSIZE_1; + tcd->soff = 1; + tcd->doff = 1; + } + + tcd->saddr = src; + tcd->daddr = dst; + tcd->nbytes = len; + tcd->biter = 1; + tcd->citer = 1; + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&mchan->lock, iflags); + list_add_tail(&mdesc->node, &mchan->prepared); + spin_unlock_irqrestore(&mchan->lock, iflags); + + return &mdesc->desc; +} + +static int __devinit mpc_dma_probe(struct platform_device *op) +{ + struct device_node *dn = op->dev.of_node; + struct device *dev = &op->dev; + struct dma_device *dma; + struct mpc_dma *mdma; + struct mpc_dma_chan *mchan; + struct resource res; + ulong regs_start, regs_size; + int retval, i; + + mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL); + if (!mdma) { + dev_err(dev, "Memory exhausted!\n"); + return -ENOMEM; + } + + mdma->irq = irq_of_parse_and_map(dn, 0); + if (mdma->irq == NO_IRQ) { + dev_err(dev, "Error mapping IRQ!\n"); + return -EINVAL; + } + + if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) { + mdma->is_mpc8308 = 1; + mdma->irq2 = irq_of_parse_and_map(dn, 1); + if (mdma->irq2 == NO_IRQ) { + dev_err(dev, "Error mapping IRQ!\n"); + return -EINVAL; + } + } + + retval = of_address_to_resource(dn, 0, &res); + if (retval) { + dev_err(dev, "Error parsing memory region!\n"); + return retval; + } + + regs_start = res.start; + regs_size = resource_size(&res); + + if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) { + dev_err(dev, "Error requesting memory region!\n"); + return -EBUSY; + } + + mdma->regs = devm_ioremap(dev, regs_start, regs_size); + if (!mdma->regs) { + dev_err(dev, "Error mapping memory region!\n"); + return -ENOMEM; + } + + mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs) + + MPC_DMA_TCD_OFFSET); + + retval = devm_request_irq(dev, mdma->irq, &mpc_dma_irq, 0, DRV_NAME, + mdma); + if (retval) { + dev_err(dev, "Error requesting IRQ!\n"); + return -EINVAL; + } + + if (mdma->is_mpc8308) { + retval = devm_request_irq(dev, mdma->irq2, &mpc_dma_irq, 0, + DRV_NAME, mdma); + if (retval) { + dev_err(dev, "Error requesting IRQ2!\n"); + return -EINVAL; + } + } + + spin_lock_init(&mdma->error_status_lock); + + dma = &mdma->dma; + dma->dev = dev; + if (!mdma->is_mpc8308) + dma->chancnt = MPC_DMA_CHANNELS; + else + dma->chancnt = 16; /* MPC8308 DMA has only 16 channels */ + dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources; + dma->device_free_chan_resources = mpc_dma_free_chan_resources; + dma->device_issue_pending = mpc_dma_issue_pending; + dma->device_tx_status = mpc_dma_tx_status; + dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy; + + INIT_LIST_HEAD(&dma->channels); + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + + for (i = 0; i < dma->chancnt; i++) { + mchan = &mdma->channels[i]; + + mchan->chan.device = dma; + dma_cookie_init(&mchan->chan); + + INIT_LIST_HEAD(&mchan->free); + INIT_LIST_HEAD(&mchan->prepared); + INIT_LIST_HEAD(&mchan->queued); + INIT_LIST_HEAD(&mchan->active); + INIT_LIST_HEAD(&mchan->completed); + + spin_lock_init(&mchan->lock); + list_add_tail(&mchan->chan.device_node, &dma->channels); + } + + tasklet_init(&mdma->tasklet, mpc_dma_tasklet, (unsigned long)mdma); + + /* + * Configure DMA Engine: + * - Dynamic clock, + * - Round-robin group arbitration, + * - Round-robin channel arbitration. + */ + if (!mdma->is_mpc8308) { + out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG | + MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA); + + /* Disable hardware DMA requests */ + out_be32(&mdma->regs->dmaerqh, 0); + out_be32(&mdma->regs->dmaerql, 0); + + /* Disable error interrupts */ + out_be32(&mdma->regs->dmaeeih, 0); + out_be32(&mdma->regs->dmaeeil, 0); + + /* Clear interrupts status */ + out_be32(&mdma->regs->dmainth, 0xFFFFFFFF); + out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF); + out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF); + out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF); + + /* Route interrupts to IPIC */ + out_be32(&mdma->regs->dmaihsa, 0); + out_be32(&mdma->regs->dmailsa, 0); + } else { + /* MPC8308 has 16 channels and lacks some registers */ + out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA); + + /* enable snooping */ + out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE); + /* Disable error interrupts */ + out_be32(&mdma->regs->dmaeeil, 0); + + /* Clear interrupts status */ + out_be32(&mdma->regs->dmaintl, 0xFFFF); + out_be32(&mdma->regs->dmaerrl, 0xFFFF); + } + + /* Register DMA engine */ + dev_set_drvdata(dev, mdma); + retval = dma_async_device_register(dma); + if (retval) { + devm_free_irq(dev, mdma->irq, mdma); + irq_dispose_mapping(mdma->irq); + } + + return retval; +} + +static int __devexit mpc_dma_remove(struct platform_device *op) +{ + struct device *dev = &op->dev; + struct mpc_dma *mdma = dev_get_drvdata(dev); + + dma_async_device_unregister(&mdma->dma); + devm_free_irq(dev, mdma->irq, mdma); + irq_dispose_mapping(mdma->irq); + + return 0; +} + +static struct of_device_id mpc_dma_match[] = { + { .compatible = "fsl,mpc5121-dma", }, + {}, +}; + +static struct platform_driver mpc_dma_driver = { + .probe = mpc_dma_probe, + .remove = __devexit_p(mpc_dma_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = mpc_dma_match, + }, +}; + +module_platform_driver(mpc_dma_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Piotr Ziecik <kosmo@semihalf.com>"); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c new file mode 100644 index 00000000..fa5d55fe --- /dev/null +++ b/drivers/dma/mv_xor.c @@ -0,0 +1,1356 @@ +/* + * offload engine driver for the Marvell XOR engine + * Copyright (C) 2007, 2008, Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/memory.h> +#include <plat/mv_xor.h> + +#include "dmaengine.h" +#include "mv_xor.h" + +static void mv_xor_issue_pending(struct dma_chan *chan); + +#define to_mv_xor_chan(chan) \ + container_of(chan, struct mv_xor_chan, common) + +#define to_mv_xor_device(dev) \ + container_of(dev, struct mv_xor_device, common) + +#define to_mv_xor_slot(tx) \ + container_of(tx, struct mv_xor_desc_slot, async_tx) + +static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + + hw_desc->status = (1 << 31); + hw_desc->phy_next_desc = 0; + hw_desc->desc_command = (1 << 31); +} + +static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + return hw_desc->phy_dest_addr; +} + +static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc, + int src_idx) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + return hw_desc->phy_src_addr[src_idx]; +} + + +static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc, + u32 byte_count) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->byte_count = byte_count; +} + +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; +} + +static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_next_desc = 0; +} + +static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val) +{ + desc->value = val; +} + +static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc, + dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_dest_addr = addr; +} + +static int mv_chan_memset_slot_count(size_t len) +{ + return 1; +} + +#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c) + +static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_src_addr[index] = addr; + if (desc->type == DMA_XOR) + hw_desc->desc_command |= (1 << index); +} + +static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan) +{ + return __raw_readl(XOR_CURR_DESC(chan)); +} + +static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan, + u32 next_desc_addr) +{ + __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan)); +} + +static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr) +{ + __raw_writel(desc_addr, XOR_DEST_POINTER(chan)); +} + +static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size) +{ + __raw_writel(block_size, XOR_BLOCK_SIZE(chan)); +} + +static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value) +{ + __raw_writel(value, XOR_INIT_VALUE_LOW(chan)); + __raw_writel(value, XOR_INIT_VALUE_HIGH(chan)); +} + +static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan) +{ + u32 val = __raw_readl(XOR_INTR_MASK(chan)); + val |= XOR_INTR_MASK_VALUE << (chan->idx * 16); + __raw_writel(val, XOR_INTR_MASK(chan)); +} + +static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) +{ + u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan)); + intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF; + return intr_cause; +} + +static int mv_is_err_intr(u32 intr_cause) +{ + if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9))) + return 1; + + return 0; +} + +static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) +{ + u32 val = ~(1 << (chan->idx * 16)); + dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val); + __raw_writel(val, XOR_INTR_CAUSE(chan)); +} + +static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan) +{ + u32 val = 0xFFFF0000 >> (chan->idx * 16); + __raw_writel(val, XOR_INTR_CAUSE(chan)); +} + +static int mv_can_chain(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc_slot *chain_old_tail = list_entry( + desc->chain_node.prev, struct mv_xor_desc_slot, chain_node); + + if (chain_old_tail->type != desc->type) + return 0; + if (desc->type == DMA_MEMSET) + return 0; + + return 1; +} + +static void mv_set_mode(struct mv_xor_chan *chan, + enum dma_transaction_type type) +{ + u32 op_mode; + u32 config = __raw_readl(XOR_CONFIG(chan)); + + switch (type) { + case DMA_XOR: + op_mode = XOR_OPERATION_MODE_XOR; + break; + case DMA_MEMCPY: + op_mode = XOR_OPERATION_MODE_MEMCPY; + break; + case DMA_MEMSET: + op_mode = XOR_OPERATION_MODE_MEMSET; + break; + default: + dev_printk(KERN_ERR, chan->device->common.dev, + "error: unsupported operation %d.\n", + type); + BUG(); + return; + } + + config &= ~0x7; + config |= op_mode; + __raw_writel(config, XOR_CONFIG(chan)); + chan->current_type = type; +} + +static void mv_chan_activate(struct mv_xor_chan *chan) +{ + u32 activation; + + dev_dbg(chan->device->common.dev, " activate chan.\n"); + activation = __raw_readl(XOR_ACTIVATION(chan)); + activation |= 0x1; + __raw_writel(activation, XOR_ACTIVATION(chan)); +} + +static char mv_chan_is_busy(struct mv_xor_chan *chan) +{ + u32 state = __raw_readl(XOR_ACTIVATION(chan)); + + state = (state >> 4) & 0x3; + + return (state == 1) ? 1 : 0; +} + +static int mv_chan_xor_slot_count(size_t len, int src_cnt) +{ + return 1; +} + +/** + * mv_xor_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_xor_free_slots(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *slot) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n", + __func__, __LINE__, slot); + + slot->slots_per_op = 0; + +} + +/* + * mv_xor_start_new_chain - program the engine to operate on new chain headed by + * sw_desc + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n", + __func__, __LINE__, sw_desc); + if (sw_desc->type != mv_chan->current_type) + mv_set_mode(mv_chan, sw_desc->type); + + if (sw_desc->type == DMA_MEMSET) { + /* for memset requests we need to program the engine, no + * descriptors used. + */ + struct mv_xor_desc *hw_desc = sw_desc->hw_desc; + mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr); + mv_chan_set_block_size(mv_chan, sw_desc->unmap_len); + mv_chan_set_value(mv_chan, sw_desc->value); + } else { + /* set the hardware chain */ + mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys); + } + mv_chan->pending += sw_desc->slot_cnt; + mv_xor_issue_pending(&mv_chan->common); +} + +static dma_cookie_t +mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + */ + if (desc->group_head && desc->unmap_len) { + struct mv_xor_desc_slot *unmap = desc->group_head; + struct device *dev = + &mv_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = desc->async_tx.flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = mv_desc_get_dest_addr(unmap); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor ? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = mv_desc_get_src_addr(unmap, + src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, + DMA_TO_DEVICE); + } + } + desc->group_head = NULL; + } + } + + /* run dependent operations */ + dma_run_dependencies(&desc->async_tx); + + return cookie; +} + +static int +mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__); + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + completed_node) { + + if (async_tx_test_ack(&iter->async_tx)) { + list_del(&iter->completed_node); + mv_xor_free_slots(mv_chan, iter); + } + } + return 0; +} + +static int +mv_xor_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n", + __func__, __LINE__, desc, desc->async_tx.flags); + list_del(&desc->chain_node); + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) { + /* move this slot to the completed_slots */ + list_add_tail(&desc->completed_node, &mv_chan->completed_slots); + return 0; + } + + mv_xor_free_slots(mv_chan, desc); + return 0; +} + +static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + dma_cookie_t cookie = 0; + int busy = mv_chan_is_busy(mv_chan); + u32 current_desc = mv_chan_get_current_desc(mv_chan); + int seen_current = 0; + + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__); + dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc); + mv_xor_clean_completed_slots(mv_chan); + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + chain_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy + */ + if (iter->async_tx.phys == current_desc) { + seen_current = 1; + if (busy) + break; + } + + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); + + if (mv_xor_clean_slot(iter, mv_chan)) + break; + } + + if ((busy == 0) && !list_empty(&mv_chan->chain)) { + struct mv_xor_desc_slot *chain_head; + chain_head = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + chain_node); + + mv_xor_start_new_chain(mv_chan, chain_head); + } + + if (cookie > 0) + mv_chan->common.completed_cookie = cookie; +} + +static void +mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + spin_lock_bh(&mv_chan->lock); + __mv_xor_slot_cleanup(mv_chan); + spin_unlock_bh(&mv_chan->lock); +} + +static void mv_xor_tasklet(unsigned long data) +{ + struct mv_xor_chan *chan = (struct mv_xor_chan *) data; + mv_xor_slot_cleanup(chan); +} + +static struct mv_xor_desc_slot * +mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots, + int slots_per_op) +{ + struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL; + LIST_HEAD(chain); + int slots_found, retry = 0; + + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = mv_chan->last_used; + else + iter = list_entry(&mv_chan->all_slots, + struct mv_xor_desc_slot, + slot_node); + + list_for_each_entry_safe_continue( + iter, _iter, &mv_chan->all_slots, slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + /* give up after finding the first busy slot + * on the second pass through the list + */ + if (retry) + break; + + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) + alloc_start = iter; + + if (slots_found == num_slots) { + struct mv_xor_desc_slot *alloc_tail = NULL; + struct mv_xor_desc_slot *last_used = NULL; + iter = alloc_start; + while (num_slots) { + int i; + + /* pre-ack all but the last descriptor */ + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct mv_xor_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->tx_list); + mv_chan->last_used = last_used; + mv_desc_clear_next_desc(alloc_start); + mv_desc_clear_next_desc(alloc_tail); + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&mv_chan->irq_tasklet); + + return NULL; +} + +/************************ DMA engine API functions ****************************/ +static dma_cookie_t +mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx); + struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan); + struct mv_xor_desc_slot *grp_start, *old_chain_tail; + dma_cookie_t cookie; + int new_hw_chain = 1; + + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p: async_tx %p\n", + __func__, sw_desc, &sw_desc->async_tx); + + grp_start = sw_desc->group_head; + + spin_lock_bh(&mv_chan->lock); + cookie = dma_cookie_assign(tx); + + if (list_empty(&mv_chan->chain)) + list_splice_init(&sw_desc->tx_list, &mv_chan->chain); + else { + new_hw_chain = 0; + + old_chain_tail = list_entry(mv_chan->chain.prev, + struct mv_xor_desc_slot, + chain_node); + list_splice_init(&grp_start->tx_list, + &old_chain_tail->chain_node); + + if (!mv_can_chain(grp_start)) + goto submit_done; + + dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n", + old_chain_tail->async_tx.phys); + + /* fix up the hardware chain */ + mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys); + + /* if the channel is not busy */ + if (!mv_chan_is_busy(mv_chan)) { + u32 current_desc = mv_chan_get_current_desc(mv_chan); + /* + * and the curren desc is the end of the chain before + * the append, then we need to start the channel + */ + if (current_desc == old_chain_tail->async_tx.phys) + new_hw_chain = 1; + } + } + + if (new_hw_chain) + mv_xor_start_new_chain(mv_chan, grp_start); + +submit_done: + spin_unlock_bh(&mv_chan->lock); + + return cookie; +} + +/* returns the number of allocated descriptors */ +static int mv_xor_alloc_chan_resources(struct dma_chan *chan) +{ + char *hw_desc; + int idx; + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *slot = NULL; + struct mv_xor_platform_data *plat_data = + mv_chan->device->pdev->dev.platform_data; + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE; + + /* Allocate descriptor slots */ + idx = mv_chan->slots_allocated; + while (idx < num_descs_in_pool) { + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "MV XOR Channel only initialized" + " %d descriptor slots", idx); + break; + } + hw_desc = (char *) mv_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE]; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = mv_xor_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->tx_list); + hw_desc = (char *) mv_chan->device->dma_desc_pool; + slot->async_tx.phys = + (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; + slot->idx = idx++; + + spin_lock_bh(&mv_chan->lock); + mv_chan->slots_allocated = idx; + list_add_tail(&slot->slot_node, &mv_chan->all_slots); + spin_unlock_bh(&mv_chan->lock); + } + + if (mv_chan->slots_allocated && !mv_chan->last_used) + mv_chan->last_used = list_entry(mv_chan->all_slots.next, + struct mv_xor_desc_slot, + slot_node); + + dev_dbg(mv_chan->device->common.dev, + "allocated %d descriptor slots last_used: %p\n", + mv_chan->slots_allocated, mv_chan->last_used); + + return mv_chan->slots_allocated ? : -ENOMEM; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + dev_dbg(mv_chan->device->common.dev, + "%s dest: %x src %x len: %u flags: %ld\n", + __func__, dest, src, len, flags); + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_memcpy_slot_count(len); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_MEMCPY; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + mv_desc_set_src_addr(grp_start, 0, src); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&mv_chan->lock); + + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p\n", + __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + dev_dbg(mv_chan->device->common.dev, + "%s dest: %x len: %u flags: %ld\n", + __func__, dest, len, flags); + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_memset_slot_count(len); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_MEMSET; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + mv_desc_set_block_fill_val(grp_start, value); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); + + dev_dbg(mv_chan->device->common.dev, + "%s src_cnt: %d len: dest %x %u flags: %ld\n", + __func__, src_cnt, len, dest, flags); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_xor_slot_count(len, src_cnt); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_XOR; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + /* the byte count field is the same as in memcpy desc*/ + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + while (src_cnt--) + mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]); + } + spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void mv_xor_free_chan_resources(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *iter, *_iter; + int in_use_descs = 0; + + mv_xor_slot_cleanup(mv_chan); + + spin_lock_bh(&mv_chan->lock); + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + completed_node) { + in_use_descs++; + list_del(&iter->completed_node); + } + list_for_each_entry_safe_reverse( + iter, _iter, &mv_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + mv_chan->slots_allocated--; + } + mv_chan->last_used = NULL; + + dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n", + __func__, mv_chan->slots_allocated); + spin_unlock_bh(&mv_chan->lock); + + if (in_use_descs) + dev_err(mv_chan->device->common.dev, + "freeing %d in use descriptors!\n", in_use_descs); +} + +/** + * mv_xor_status - poll the status of an XOR transaction + * @chan: XOR channel handle + * @cookie: XOR transaction identifier + * @txstate: XOR transactions state holder (or NULL) + */ +static enum dma_status mv_xor_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_SUCCESS) { + mv_xor_clean_completed_slots(mv_chan); + return ret; + } + mv_xor_slot_cleanup(mv_chan); + + return dma_cookie_status(chan, cookie, txstate); +} + +static void mv_dump_xor_regs(struct mv_xor_chan *chan) +{ + u32 val; + + val = __raw_readl(XOR_CONFIG(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "config 0x%08x.\n", val); + + val = __raw_readl(XOR_ACTIVATION(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "activation 0x%08x.\n", val); + + val = __raw_readl(XOR_INTR_CAUSE(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "intr cause 0x%08x.\n", val); + + val = __raw_readl(XOR_INTR_MASK(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "intr mask 0x%08x.\n", val); + + val = __raw_readl(XOR_ERROR_CAUSE(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "error cause 0x%08x.\n", val); + + val = __raw_readl(XOR_ERROR_ADDR(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "error addr 0x%08x.\n", val); +} + +static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) +{ + if (intr_cause & (1 << 4)) { + dev_dbg(chan->device->common.dev, + "ignore this error\n"); + return; + } + + dev_printk(KERN_ERR, chan->device->common.dev, + "error on chan %d. intr cause 0x%08x.\n", + chan->idx, intr_cause); + + mv_dump_xor_regs(chan); + BUG(); +} + +static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) +{ + struct mv_xor_chan *chan = data; + u32 intr_cause = mv_chan_get_intr_cause(chan); + + dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause); + + if (mv_is_err_intr(intr_cause)) + mv_xor_err_interrupt_handler(chan, intr_cause); + + tasklet_schedule(&chan->irq_tasklet); + + mv_xor_device_clear_eoc_cause(chan); + + return IRQ_HANDLED; +} + +static void mv_xor_issue_pending(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + + if (mv_chan->pending >= MV_XOR_THRESHOLD) { + mv_chan->pending = 0; + mv_chan_activate(mv_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ +#define MV_XOR_TEST_SIZE 2000 + +static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device) +{ + int i; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + int err = 0; + struct mv_xor_chan *mv_chan; + + src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + + dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < MV_XOR_TEST_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (mv_xor_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + dest_dma = dma_map_single(dma_chan->device->dev, dest, + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + + src_dma = dma_map_single(dma_chan->device->dev, src, + MV_XOR_TEST_SIZE, DMA_TO_DEVICE); + + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma, + MV_XOR_TEST_SIZE, 0); + cookie = mv_xor_tx_submit(tx); + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(1); + + if (mv_xor_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_chan = to_mv_xor_chan(dma_chan); + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma, + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, MV_XOR_TEST_SIZE)) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + mv_xor_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ +static int __devinit +mv_xor_xor_self_test(struct mv_xor_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + int err = 0; + struct mv_xor_chan *mv_chan; + + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (mv_xor_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + + for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + + tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0); + + cookie = mv_xor_tx_submit(tx); + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (mv_xor_status(dma_chan, cookie, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_chan = to_mv_xor_chan(dma_chan); + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor failed compare, disabling." + " index %d, data %x, expected %x\n", i, + ptr[i], cmp_word); + err = -ENODEV; + goto free_resources; + } + } + +free_resources: + mv_xor_free_chan_resources(dma_chan); +out: + src_idx = MV_XOR_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devexit mv_xor_remove(struct platform_device *dev) +{ + struct mv_xor_device *device = platform_get_drvdata(dev); + struct dma_chan *chan, *_chan; + struct mv_xor_chan *mv_chan; + struct mv_xor_platform_data *plat_data = dev->dev.platform_data; + + dma_async_device_unregister(&device->common); + + dma_free_coherent(&dev->dev, plat_data->pool_size, + device->dma_desc_pool_virt, device->dma_desc_pool); + + list_for_each_entry_safe(chan, _chan, &device->common.channels, + device_node) { + mv_chan = to_mv_xor_chan(chan); + list_del(&chan->device_node); + } + + return 0; +} + +static int __devinit mv_xor_probe(struct platform_device *pdev) +{ + int ret = 0; + int irq; + struct mv_xor_device *adev; + struct mv_xor_chan *mv_chan; + struct dma_device *dma_dev; + struct mv_xor_platform_data *plat_data = pdev->dev.platform_data; + + + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + dma_dev = &adev->common; + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL); + if (!adev->dma_desc_pool_virt) + return -ENOMEM; + + adev->id = plat_data->hw_id; + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = plat_data->cap_mask; + adev->pdev = pdev; + platform_set_drvdata(pdev, adev); + + adev->shared = platform_get_drvdata(plat_data->shared); + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources; + dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; + dma_dev->device_tx_status = mv_xor_status; + dma_dev->device_issue_pending = mv_xor_issue_pending; + dma_dev->dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = 8; + dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; + } + + mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL); + if (!mv_chan) { + ret = -ENOMEM; + goto err_free_dma; + } + mv_chan->device = adev; + mv_chan->idx = plat_data->hw_id; + mv_chan->mmr_base = adev->shared->xor_base; + + if (!mv_chan->mmr_base) { + ret = -ENOMEM; + goto err_free_dma; + } + tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long) + mv_chan); + + /* clear errors before enabling interrupts */ + mv_xor_device_clear_err_status(mv_chan); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto err_free_dma; + } + ret = devm_request_irq(&pdev->dev, irq, + mv_xor_interrupt_handler, + 0, dev_name(&pdev->dev), mv_chan); + if (ret) + goto err_free_dma; + + mv_chan_unmask_interrupts(mv_chan); + + mv_set_mode(mv_chan, DMA_MEMCPY); + + spin_lock_init(&mv_chan->lock); + INIT_LIST_HEAD(&mv_chan->chain); + INIT_LIST_HEAD(&mv_chan->completed_slots); + INIT_LIST_HEAD(&mv_chan->all_slots); + mv_chan->common.device = dma_dev; + dma_cookie_init(&mv_chan->common); + + list_add_tail(&mv_chan->common.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = mv_xor_memcpy_self_test(adev); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_dma; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + ret = mv_xor_xor_self_test(adev); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_dma; + } + + dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: " + "( %s%s%s%s)\n", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + dma_async_device_register(dma_dev); + goto out; + + err_free_dma: + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + out: + return ret; +} + +static void +mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, + const struct mbus_dram_target_info *dram) +{ + void __iomem *base = msp->xor_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + + for (i = 0; i < dram->num_cs; i++) { + const struct mbus_dram_window *cs = dram->cs + i; + + writel((cs->base & 0xffff0000) | + (cs->mbus_attr << 8) | + dram->mbus_dram_target_id, base + WINDOW_BASE(i)); + writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i)); + + win_enable |= (1 << i); + win_enable |= 3 << (16 + (2 * i)); + } + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); +} + +static struct platform_driver mv_xor_driver = { + .probe = mv_xor_probe, + .remove = __devexit_p(mv_xor_remove), + .driver = { + .owner = THIS_MODULE, + .name = MV_XOR_NAME, + }, +}; + +static int mv_xor_shared_probe(struct platform_device *pdev) +{ + const struct mbus_dram_target_info *dram; + struct mv_xor_shared_private *msp; + struct resource *res; + + dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n"); + + msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL); + if (!msp) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + msp->xor_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!msp->xor_base) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return -ENODEV; + + msp->xor_high_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!msp->xor_high_base) + return -EBUSY; + + platform_set_drvdata(pdev, msp); + + /* + * (Re-)program MBUS remapping windows if we are asked to. + */ + dram = mv_mbus_dram_info(); + if (dram) + mv_xor_conf_mbus_windows(msp, dram); + + return 0; +} + +static int mv_xor_shared_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver mv_xor_shared_driver = { + .probe = mv_xor_shared_probe, + .remove = mv_xor_shared_remove, + .driver = { + .owner = THIS_MODULE, + .name = MV_XOR_SHARED_NAME, + }, +}; + + +static int __init mv_xor_init(void) +{ + int rc; + + rc = platform_driver_register(&mv_xor_shared_driver); + if (!rc) { + rc = platform_driver_register(&mv_xor_driver); + if (rc) + platform_driver_unregister(&mv_xor_shared_driver); + } + return rc; +} +module_init(mv_xor_init); + +/* it's currently unsafe to unload this module */ +#if 0 +static void __exit mv_xor_exit(void) +{ + platform_driver_unregister(&mv_xor_driver); + platform_driver_unregister(&mv_xor_shared_driver); + return; +} + +module_exit(mv_xor_exit); +#endif + +MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); +MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h new file mode 100644 index 00000000..654876b7 --- /dev/null +++ b/drivers/dma/mv_xor.h @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2007, 2008, Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MV_XOR_H +#define MV_XOR_H + +#include <linux/types.h> +#include <linux/io.h> +#include <linux/dmaengine.h> +#include <linux/interrupt.h> + +#define USE_TIMER +#define MV_XOR_SLOT_SIZE 64 +#define MV_XOR_THRESHOLD 1 + +#define XOR_OPERATION_MODE_XOR 0 +#define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_MEMSET 4 + +#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4)) +#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4)) +#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4)) +#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4)) +#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4)) +#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0) +#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4) + +#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4)) +#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4)) +#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30) +#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40) +#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50) +#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60) +#define XOR_INTR_MASK_VALUE 0x3F5 + +#define WINDOW_BASE(w) (0x250 + ((w) << 2)) +#define WINDOW_SIZE(w) (0x270 + ((w) << 2)) +#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2)) +#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2)) + +struct mv_xor_shared_private { + void __iomem *xor_base; + void __iomem *xor_high_base; +}; + + +/** + * struct mv_xor_device - internal representation of a XOR device + * @pdev: Platform device + * @id: HW XOR Device selector + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @common: embedded struct dma_device + */ +struct mv_xor_device { + struct platform_device *pdev; + int id; + dma_addr_t dma_desc_pool; + void *dma_desc_pool_virt; + struct dma_device common; + struct mv_xor_shared_private *shared; +}; + +/** + * struct mv_xor_chan - internal representation of a XOR channel + * @pending: allows batching of hardware operations + * @lock: serializes enqueue/dequeue operations to the descriptors pool + * @mmr_base: memory mapped register base + * @idx: the index of the xor channel + * @chain: device chain view of the descriptors + * @completed_slots: slots completed by HW but still need to be acked + * @device: parent device + * @common: common dmaengine channel object members + * @last_used: place holder for allocation to continue from where it left off + * @all_slots: complete domain of slots usable by the channel + * @slots_allocated: records the actual size of the descriptor slot pool + * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + */ +struct mv_xor_chan { + int pending; + spinlock_t lock; /* protects the descriptor slot pool */ + void __iomem *mmr_base; + unsigned int idx; + enum dma_transaction_type current_type; + struct list_head chain; + struct list_head completed_slots; + struct mv_xor_device *device; + struct dma_chan common; + struct mv_xor_desc_slot *last_used; + struct list_head all_slots; + int slots_allocated; + struct tasklet_struct irq_tasklet; +#ifdef USE_TIMER + unsigned long cleanup_time; + u32 current_on_last_cleanup; +#endif +}; + +/** + * struct mv_xor_desc_slot - software descriptor + * @slot_node: node on the mv_xor_chan.all_slots list + * @chain_node: node on the mv_xor_chan.chain list + * @completed_node: node on the mv_xor_chan.completed_slots list + * @hw_desc: virtual address of the hardware descriptor chain + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @slot_cnt: total slots used in an transaction (group of operations) + * @slots_per_op: number of slots per operation + * @idx: pool index + * @unmap_src_cnt: number of xor sources + * @unmap_len: transaction bytecount + * @tx_list: list of slots that make up a multi-descriptor transaction + * @async_tx: support for the async_tx api + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct mv_xor_desc_slot { + struct list_head slot_node; + struct list_head chain_node; + struct list_head completed_node; + enum dma_transaction_type type; + void *hw_desc; + struct mv_xor_desc_slot *group_head; + u16 slot_cnt; + u16 slots_per_op; + u16 idx; + u16 unmap_src_cnt; + u32 value; + size_t unmap_len; + struct list_head tx_list; + struct dma_async_tx_descriptor async_tx; + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +#ifdef USE_TIMER + unsigned long arrival_time; + struct timer_list timeout; +#endif +}; + +/* This structure describes XOR descriptor size 64bytes */ +struct mv_xor_desc { + u32 status; /* descriptor execution status */ + u32 crc32_result; /* result of CRC-32 calculation */ + u32 desc_command; /* type of operation to be carried out */ + u32 phy_next_desc; /* next descriptor address pointer */ + u32 byte_count; /* size of src/dst blocks in bytes */ + u32 phy_dest_addr; /* destination block address */ + u32 phy_src_addr[8]; /* source block addresses */ + u32 reserved0; + u32 reserved1; +}; + +#define to_mv_sw_desc(addr_hw_desc) \ + container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc) + +#define mv_hw_desc_slot_idx(hw_desc, idx) \ + ((void *)(((unsigned long)hw_desc) + ((idx) << 5))) + +#define MV_XOR_MIN_BYTE_COUNT (128) +#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1) +#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT + + +#endif diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c new file mode 100644 index 00000000..655d4ce6 --- /dev/null +++ b/drivers/dma/mxs-dma.c @@ -0,0 +1,715 @@ +/* + * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved. + * + * Refer to drivers/dma/imx-sdma.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/clk.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/fsl/mxs-dma.h> + +#include <asm/irq.h> +#include <mach/mxs.h> +#include <mach/common.h> + +#include "dmaengine.h" + +/* + * NOTE: The term "PIO" throughout the mxs-dma implementation means + * PIO mode of mxs apbh-dma and apbx-dma. With this working mode, + * dma can program the controller registers of peripheral devices. + */ + +#define MXS_DMA_APBH 0 +#define MXS_DMA_APBX 1 +#define dma_is_apbh() (mxs_dma->dev_id == MXS_DMA_APBH) + +#define APBH_VERSION_LATEST 3 +#define apbh_is_old() (mxs_dma->version < APBH_VERSION_LATEST) + +#define HW_APBHX_CTRL0 0x000 +#define BM_APBH_CTRL0_APB_BURST8_EN (1 << 29) +#define BM_APBH_CTRL0_APB_BURST_EN (1 << 28) +#define BP_APBH_CTRL0_RESET_CHANNEL 16 +#define HW_APBHX_CTRL1 0x010 +#define HW_APBHX_CTRL2 0x020 +#define HW_APBHX_CHANNEL_CTRL 0x030 +#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL 16 +#define HW_APBH_VERSION (cpu_is_mx23() ? 0x3f0 : 0x800) +#define HW_APBX_VERSION 0x800 +#define BP_APBHX_VERSION_MAJOR 24 +#define HW_APBHX_CHn_NXTCMDAR(n) \ + (((dma_is_apbh() && apbh_is_old()) ? 0x050 : 0x110) + (n) * 0x70) +#define HW_APBHX_CHn_SEMA(n) \ + (((dma_is_apbh() && apbh_is_old()) ? 0x080 : 0x140) + (n) * 0x70) + +/* + * ccw bits definitions + * + * COMMAND: 0..1 (2) + * CHAIN: 2 (1) + * IRQ: 3 (1) + * NAND_LOCK: 4 (1) - not implemented + * NAND_WAIT4READY: 5 (1) - not implemented + * DEC_SEM: 6 (1) + * WAIT4END: 7 (1) + * HALT_ON_TERMINATE: 8 (1) + * TERMINATE_FLUSH: 9 (1) + * RESERVED: 10..11 (2) + * PIO_NUM: 12..15 (4) + */ +#define BP_CCW_COMMAND 0 +#define BM_CCW_COMMAND (3 << 0) +#define CCW_CHAIN (1 << 2) +#define CCW_IRQ (1 << 3) +#define CCW_DEC_SEM (1 << 6) +#define CCW_WAIT4END (1 << 7) +#define CCW_HALT_ON_TERM (1 << 8) +#define CCW_TERM_FLUSH (1 << 9) +#define BP_CCW_PIO_NUM 12 +#define BM_CCW_PIO_NUM (0xf << 12) + +#define BF_CCW(value, field) (((value) << BP_CCW_##field) & BM_CCW_##field) + +#define MXS_DMA_CMD_NO_XFER 0 +#define MXS_DMA_CMD_WRITE 1 +#define MXS_DMA_CMD_READ 2 +#define MXS_DMA_CMD_DMA_SENSE 3 /* not implemented */ + +struct mxs_dma_ccw { + u32 next; + u16 bits; + u16 xfer_bytes; +#define MAX_XFER_BYTES 0xff00 + u32 bufaddr; +#define MXS_PIO_WORDS 16 + u32 pio_words[MXS_PIO_WORDS]; +}; + +#define NUM_CCW (int)(PAGE_SIZE / sizeof(struct mxs_dma_ccw)) + +struct mxs_dma_chan { + struct mxs_dma_engine *mxs_dma; + struct dma_chan chan; + struct dma_async_tx_descriptor desc; + struct tasklet_struct tasklet; + int chan_irq; + struct mxs_dma_ccw *ccw; + dma_addr_t ccw_phys; + int desc_count; + enum dma_status status; + unsigned int flags; +#define MXS_DMA_SG_LOOP (1 << 0) +}; + +#define MXS_DMA_CHANNELS 16 +#define MXS_DMA_CHANNELS_MASK 0xffff + +struct mxs_dma_engine { + int dev_id; + unsigned int version; + void __iomem *base; + struct clk *clk; + struct dma_device dma_device; + struct device_dma_parameters dma_parms; + struct mxs_dma_chan mxs_chans[MXS_DMA_CHANNELS]; +}; + +static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan) +{ + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + if (dma_is_apbh() && apbh_is_old()) + writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL), + mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR); + else + writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL), + mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR); +} + +static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan) +{ + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* set cmd_addr up */ + writel(mxs_chan->ccw_phys, + mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(chan_id)); + + /* write 1 to SEMA to kick off the channel */ + writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(chan_id)); +} + +static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan) +{ + mxs_chan->status = DMA_SUCCESS; +} + +static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan) +{ + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* freeze the channel */ + if (dma_is_apbh() && apbh_is_old()) + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR); + else + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR); + + mxs_chan->status = DMA_PAUSED; +} + +static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan) +{ + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int chan_id = mxs_chan->chan.chan_id; + + /* unfreeze the channel */ + if (dma_is_apbh() && apbh_is_old()) + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR); + else + writel(1 << chan_id, + mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_CLR_ADDR); + + mxs_chan->status = DMA_IN_PROGRESS; +} + +static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct mxs_dma_chan, chan); +} + +static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + return dma_cookie_assign(tx); +} + +static void mxs_dma_tasklet(unsigned long data) +{ + struct mxs_dma_chan *mxs_chan = (struct mxs_dma_chan *) data; + + if (mxs_chan->desc.callback) + mxs_chan->desc.callback(mxs_chan->desc.callback_param); +} + +static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id) +{ + struct mxs_dma_engine *mxs_dma = dev_id; + u32 stat1, stat2; + + /* completion status */ + stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1); + stat1 &= MXS_DMA_CHANNELS_MASK; + writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + MXS_CLR_ADDR); + + /* error status */ + stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2); + writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + MXS_CLR_ADDR); + + /* + * When both completion and error of termination bits set at the + * same time, we do not take it as an error. IOW, it only becomes + * an error we need to handle here in case of either it's (1) a bus + * error or (2) a termination error with no completion. + */ + stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */ + (~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */ + + /* combine error and completion status for checking */ + stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1; + while (stat1) { + int channel = fls(stat1) - 1; + struct mxs_dma_chan *mxs_chan = + &mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS]; + + if (channel >= MXS_DMA_CHANNELS) { + dev_dbg(mxs_dma->dma_device.dev, + "%s: error in channel %d\n", __func__, + channel - MXS_DMA_CHANNELS); + mxs_chan->status = DMA_ERROR; + mxs_dma_reset_chan(mxs_chan); + } else { + if (mxs_chan->flags & MXS_DMA_SG_LOOP) + mxs_chan->status = DMA_IN_PROGRESS; + else + mxs_chan->status = DMA_SUCCESS; + } + + stat1 &= ~(1 << channel); + + if (mxs_chan->status == DMA_SUCCESS) + dma_cookie_complete(&mxs_chan->desc); + + /* schedule tasklet on this channel */ + tasklet_schedule(&mxs_chan->tasklet); + } + + return IRQ_HANDLED; +} + +static int mxs_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_data *data = chan->private; + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int ret; + + if (!data) + return -EINVAL; + + mxs_chan->chan_irq = data->chan_irq; + + mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, PAGE_SIZE, + &mxs_chan->ccw_phys, GFP_KERNEL); + if (!mxs_chan->ccw) { + ret = -ENOMEM; + goto err_alloc; + } + + memset(mxs_chan->ccw, 0, PAGE_SIZE); + + if (mxs_chan->chan_irq != NO_IRQ) { + ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, + 0, "mxs-dma", mxs_dma); + if (ret) + goto err_irq; + } + + ret = clk_prepare_enable(mxs_dma->clk); + if (ret) + goto err_clk; + + mxs_dma_reset_chan(mxs_chan); + + dma_async_tx_descriptor_init(&mxs_chan->desc, chan); + mxs_chan->desc.tx_submit = mxs_dma_tx_submit; + + /* the descriptor is ready */ + async_tx_ack(&mxs_chan->desc); + + return 0; + +err_clk: + free_irq(mxs_chan->chan_irq, mxs_dma); +err_irq: + dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE, + mxs_chan->ccw, mxs_chan->ccw_phys); +err_alloc: + return ret; +} + +static void mxs_dma_free_chan_resources(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + + mxs_dma_disable_chan(mxs_chan); + + free_irq(mxs_chan->chan_irq, mxs_dma); + + dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE, + mxs_chan->ccw, mxs_chan->ccw_phys); + + clk_disable_unprepare(mxs_dma->clk); +} + +/* + * How to use the flags for ->device_prep_slave_sg() : + * [1] If there is only one DMA command in the DMA chain, the code should be: + * ...... + * ->device_prep_slave_sg(DMA_CTRL_ACK); + * ...... + * [2] If there are two DMA commands in the DMA chain, the code should be + * ...... + * ->device_prep_slave_sg(0); + * ...... + * ->device_prep_slave_sg(DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + * ...... + * [3] If there are more than two DMA commands in the DMA chain, the code + * should be: + * ...... + * ->device_prep_slave_sg(0); // First + * ...... + * ->device_prep_slave_sg(DMA_PREP_INTERRUPT [| DMA_CTRL_ACK]); + * ...... + * ->device_prep_slave_sg(DMA_PREP_INTERRUPT | DMA_CTRL_ACK); // Last + * ...... + */ +static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + struct mxs_dma_ccw *ccw; + struct scatterlist *sg; + int i, j; + u32 *pio; + bool append = flags & DMA_PREP_INTERRUPT; + int idx = append ? mxs_chan->desc_count : 0; + + if (mxs_chan->status == DMA_IN_PROGRESS && !append) + return NULL; + + if (sg_len + (append ? idx : 0) > NUM_CCW) { + dev_err(mxs_dma->dma_device.dev, + "maximum number of sg exceeded: %d > %d\n", + sg_len, NUM_CCW); + goto err_out; + } + + mxs_chan->status = DMA_IN_PROGRESS; + mxs_chan->flags = 0; + + /* + * If the sg is prepared with append flag set, the sg + * will be appended to the last prepared sg. + */ + if (append) { + BUG_ON(idx < 1); + ccw = &mxs_chan->ccw[idx - 1]; + ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx; + ccw->bits |= CCW_CHAIN; + ccw->bits &= ~CCW_IRQ; + ccw->bits &= ~CCW_DEC_SEM; + } else { + idx = 0; + } + + if (direction == DMA_TRANS_NONE) { + ccw = &mxs_chan->ccw[idx++]; + pio = (u32 *) sgl; + + for (j = 0; j < sg_len;) + ccw->pio_words[j++] = *pio++; + + ccw->bits = 0; + ccw->bits |= CCW_IRQ; + ccw->bits |= CCW_DEC_SEM; + if (flags & DMA_CTRL_ACK) + ccw->bits |= CCW_WAIT4END; + ccw->bits |= CCW_HALT_ON_TERM; + ccw->bits |= CCW_TERM_FLUSH; + ccw->bits |= BF_CCW(sg_len, PIO_NUM); + ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND); + } else { + for_each_sg(sgl, sg, sg_len, i) { + if (sg->length > MAX_XFER_BYTES) { + dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n", + sg->length, MAX_XFER_BYTES); + goto err_out; + } + + ccw = &mxs_chan->ccw[idx++]; + + ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx; + ccw->bufaddr = sg->dma_address; + ccw->xfer_bytes = sg->length; + + ccw->bits = 0; + ccw->bits |= CCW_CHAIN; + ccw->bits |= CCW_HALT_ON_TERM; + ccw->bits |= CCW_TERM_FLUSH; + ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ? + MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, + COMMAND); + + if (i + 1 == sg_len) { + ccw->bits &= ~CCW_CHAIN; + ccw->bits |= CCW_IRQ; + ccw->bits |= CCW_DEC_SEM; + if (flags & DMA_CTRL_ACK) + ccw->bits |= CCW_WAIT4END; + } + } + } + mxs_chan->desc_count = idx; + + return &mxs_chan->desc; + +err_out: + mxs_chan->status = DMA_ERROR; + return NULL; +} + +static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + void *context) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; + int num_periods = buf_len / period_len; + int i = 0, buf = 0; + + if (mxs_chan->status == DMA_IN_PROGRESS) + return NULL; + + mxs_chan->status = DMA_IN_PROGRESS; + mxs_chan->flags |= MXS_DMA_SG_LOOP; + + if (num_periods > NUM_CCW) { + dev_err(mxs_dma->dma_device.dev, + "maximum number of sg exceeded: %d > %d\n", + num_periods, NUM_CCW); + goto err_out; + } + + if (period_len > MAX_XFER_BYTES) { + dev_err(mxs_dma->dma_device.dev, + "maximum period size exceeded: %d > %d\n", + period_len, MAX_XFER_BYTES); + goto err_out; + } + + while (buf < buf_len) { + struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i]; + + if (i + 1 == num_periods) + ccw->next = mxs_chan->ccw_phys; + else + ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1); + + ccw->bufaddr = dma_addr; + ccw->xfer_bytes = period_len; + + ccw->bits = 0; + ccw->bits |= CCW_CHAIN; + ccw->bits |= CCW_IRQ; + ccw->bits |= CCW_HALT_ON_TERM; + ccw->bits |= CCW_TERM_FLUSH; + ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ? + MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND); + + dma_addr += period_len; + buf += period_len; + + i++; + } + mxs_chan->desc_count = i; + + return &mxs_chan->desc; + +err_out: + mxs_chan->status = DMA_ERROR; + return NULL; +} + +static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + int ret = 0; + + switch (cmd) { + case DMA_TERMINATE_ALL: + mxs_dma_reset_chan(mxs_chan); + mxs_dma_disable_chan(mxs_chan); + break; + case DMA_PAUSE: + mxs_dma_pause_chan(mxs_chan); + break; + case DMA_RESUME: + mxs_dma_resume_chan(mxs_chan); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + +static enum dma_status mxs_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + dma_cookie_t last_used; + + last_used = chan->cookie; + dma_set_tx_state(txstate, chan->completed_cookie, last_used, 0); + + return mxs_chan->status; +} + +static void mxs_dma_issue_pending(struct dma_chan *chan) +{ + struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); + + mxs_dma_enable_chan(mxs_chan); +} + +static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma) +{ + int ret; + + ret = clk_prepare_enable(mxs_dma->clk); + if (ret) + return ret; + + ret = mxs_reset_block(mxs_dma->base); + if (ret) + goto err_out; + + /* only major version matters */ + mxs_dma->version = readl(mxs_dma->base + + ((mxs_dma->dev_id == MXS_DMA_APBX) ? + HW_APBX_VERSION : HW_APBH_VERSION)) >> + BP_APBHX_VERSION_MAJOR; + + /* enable apbh burst */ + if (dma_is_apbh()) { + writel(BM_APBH_CTRL0_APB_BURST_EN, + mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR); + writel(BM_APBH_CTRL0_APB_BURST8_EN, + mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR); + } + + /* enable irq for all the channels */ + writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS, + mxs_dma->base + HW_APBHX_CTRL1 + MXS_SET_ADDR); + +err_out: + clk_disable_unprepare(mxs_dma->clk); + return ret; +} + +static int __init mxs_dma_probe(struct platform_device *pdev) +{ + const struct platform_device_id *id_entry = + platform_get_device_id(pdev); + struct mxs_dma_engine *mxs_dma; + struct resource *iores; + int ret, i; + + mxs_dma = kzalloc(sizeof(*mxs_dma), GFP_KERNEL); + if (!mxs_dma) + return -ENOMEM; + + mxs_dma->dev_id = id_entry->driver_data; + + iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + if (!request_mem_region(iores->start, resource_size(iores), + pdev->name)) { + ret = -EBUSY; + goto err_request_region; + } + + mxs_dma->base = ioremap(iores->start, resource_size(iores)); + if (!mxs_dma->base) { + ret = -ENOMEM; + goto err_ioremap; + } + + mxs_dma->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(mxs_dma->clk)) { + ret = PTR_ERR(mxs_dma->clk); + goto err_clk; + } + + dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask); + dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask); + + INIT_LIST_HEAD(&mxs_dma->dma_device.channels); + + /* Initialize channel parameters */ + for (i = 0; i < MXS_DMA_CHANNELS; i++) { + struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i]; + + mxs_chan->mxs_dma = mxs_dma; + mxs_chan->chan.device = &mxs_dma->dma_device; + dma_cookie_init(&mxs_chan->chan); + + tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet, + (unsigned long) mxs_chan); + + + /* Add the channel to mxs_chan list */ + list_add_tail(&mxs_chan->chan.device_node, + &mxs_dma->dma_device.channels); + } + + ret = mxs_dma_init(mxs_dma); + if (ret) + goto err_init; + + mxs_dma->dma_device.dev = &pdev->dev; + + /* mxs_dma gets 65535 bytes maximum sg size */ + mxs_dma->dma_device.dev->dma_parms = &mxs_dma->dma_parms; + dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES); + + mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources; + mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources; + mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status; + mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg; + mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic; + mxs_dma->dma_device.device_control = mxs_dma_control; + mxs_dma->dma_device.device_issue_pending = mxs_dma_issue_pending; + + ret = dma_async_device_register(&mxs_dma->dma_device); + if (ret) { + dev_err(mxs_dma->dma_device.dev, "unable to register\n"); + goto err_init; + } + + dev_info(mxs_dma->dma_device.dev, "initialized\n"); + + return 0; + +err_init: + clk_put(mxs_dma->clk); +err_clk: + iounmap(mxs_dma->base); +err_ioremap: + release_mem_region(iores->start, resource_size(iores)); +err_request_region: + kfree(mxs_dma); + return ret; +} + +static struct platform_device_id mxs_dma_type[] = { + { + .name = "mxs-dma-apbh", + .driver_data = MXS_DMA_APBH, + }, { + .name = "mxs-dma-apbx", + .driver_data = MXS_DMA_APBX, + }, { + /* end of list */ + } +}; + +static struct platform_driver mxs_dma_driver = { + .driver = { + .name = "mxs-dma", + }, + .id_table = mxs_dma_type, +}; + +static int __init mxs_dma_module_init(void) +{ + return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe); +} +subsys_initcall(mxs_dma_module_init); diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c new file mode 100644 index 00000000..65c0495a --- /dev/null +++ b/drivers/dma/pch_dma.c @@ -0,0 +1,1048 @@ +/* + * Topcliff PCH DMA controller driver + * Copyright (c) 2010 Intel Corporation + * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pch_dma.h> + +#include "dmaengine.h" + +#define DRV_NAME "pch-dma" + +#define DMA_CTL0_DISABLE 0x0 +#define DMA_CTL0_SG 0x1 +#define DMA_CTL0_ONESHOT 0x2 +#define DMA_CTL0_MODE_MASK_BITS 0x3 +#define DMA_CTL0_DIR_SHIFT_BITS 2 +#define DMA_CTL0_BITS_PER_CH 4 + +#define DMA_CTL2_START_SHIFT_BITS 8 +#define DMA_CTL2_IRQ_ENABLE_MASK ((1UL << DMA_CTL2_START_SHIFT_BITS) - 1) + +#define DMA_STATUS_IDLE 0x0 +#define DMA_STATUS_DESC_READ 0x1 +#define DMA_STATUS_WAIT 0x2 +#define DMA_STATUS_ACCESS 0x3 +#define DMA_STATUS_BITS_PER_CH 2 +#define DMA_STATUS_MASK_BITS 0x3 +#define DMA_STATUS_SHIFT_BITS 16 +#define DMA_STATUS_IRQ(x) (0x1 << (x)) +#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8)) +#define DMA_STATUS2_ERR(x) (0x1 << (x)) + +#define DMA_DESC_WIDTH_SHIFT_BITS 12 +#define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS) +#define DMA_DESC_WIDTH_2_BYTES (0x2 << DMA_DESC_WIDTH_SHIFT_BITS) +#define DMA_DESC_WIDTH_4_BYTES (0x0 << DMA_DESC_WIDTH_SHIFT_BITS) +#define DMA_DESC_MAX_COUNT_1_BYTE 0x3FF +#define DMA_DESC_MAX_COUNT_2_BYTES 0x3FF +#define DMA_DESC_MAX_COUNT_4_BYTES 0x7FF +#define DMA_DESC_END_WITHOUT_IRQ 0x0 +#define DMA_DESC_END_WITH_IRQ 0x1 +#define DMA_DESC_FOLLOW_WITHOUT_IRQ 0x2 +#define DMA_DESC_FOLLOW_WITH_IRQ 0x3 + +#define MAX_CHAN_NR 12 + +#define DMA_MASK_CTL0_MODE 0x33333333 +#define DMA_MASK_CTL2_MODE 0x00003333 + +static unsigned int init_nr_desc_per_channel = 64; +module_param(init_nr_desc_per_channel, uint, 0644); +MODULE_PARM_DESC(init_nr_desc_per_channel, + "initial descriptors per channel (default: 64)"); + +struct pch_dma_desc_regs { + u32 dev_addr; + u32 mem_addr; + u32 size; + u32 next; +}; + +struct pch_dma_regs { + u32 dma_ctl0; + u32 dma_ctl1; + u32 dma_ctl2; + u32 dma_ctl3; + u32 dma_sts0; + u32 dma_sts1; + u32 dma_sts2; + u32 reserved3; + struct pch_dma_desc_regs desc[MAX_CHAN_NR]; +}; + +struct pch_dma_desc { + struct pch_dma_desc_regs regs; + struct dma_async_tx_descriptor txd; + struct list_head desc_node; + struct list_head tx_list; +}; + +struct pch_dma_chan { + struct dma_chan chan; + void __iomem *membase; + enum dma_transfer_direction dir; + struct tasklet_struct tasklet; + unsigned long err_status; + + spinlock_t lock; + + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int descs_allocated; +}; + +#define PDC_DEV_ADDR 0x00 +#define PDC_MEM_ADDR 0x04 +#define PDC_SIZE 0x08 +#define PDC_NEXT 0x0C + +#define channel_readl(pdc, name) \ + readl((pdc)->membase + PDC_##name) +#define channel_writel(pdc, name, val) \ + writel((val), (pdc)->membase + PDC_##name) + +struct pch_dma { + struct dma_device dma; + void __iomem *membase; + struct pci_pool *pool; + struct pch_dma_regs regs; + struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR]; + struct pch_dma_chan channels[MAX_CHAN_NR]; +}; + +#define PCH_DMA_CTL0 0x00 +#define PCH_DMA_CTL1 0x04 +#define PCH_DMA_CTL2 0x08 +#define PCH_DMA_CTL3 0x0C +#define PCH_DMA_STS0 0x10 +#define PCH_DMA_STS1 0x14 +#define PCH_DMA_STS2 0x18 + +#define dma_readl(pd, name) \ + readl((pd)->membase + PCH_DMA_##name) +#define dma_writel(pd, name, val) \ + writel((val), (pd)->membase + PCH_DMA_##name) + +static inline +struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct pch_dma_desc, txd); +} + +static inline struct pch_dma_chan *to_pd_chan(struct dma_chan *chan) +{ + return container_of(chan, struct pch_dma_chan, chan); +} + +static inline struct pch_dma *to_pd(struct dma_device *ddev) +{ + return container_of(ddev, struct pch_dma, dma); +} + +static inline struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +static inline +struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan) +{ + return list_first_entry(&pd_chan->active_list, + struct pch_dma_desc, desc_node); +} + +static inline +struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan) +{ + return list_first_entry(&pd_chan->queue, + struct pch_dma_desc, desc_node); +} + +static void pdc_enable_irq(struct dma_chan *chan, int enable) +{ + struct pch_dma *pd = to_pd(chan->device); + u32 val; + int pos; + + if (chan->chan_id < 8) + pos = chan->chan_id; + else + pos = chan->chan_id + 8; + + val = dma_readl(pd, CTL2); + + if (enable) + val |= 0x1 << pos; + else + val &= ~(0x1 << pos); + + dma_writel(pd, CTL2, val); + + dev_dbg(chan2dev(chan), "pdc_enable_irq: chan %d -> %x\n", + chan->chan_id, val); +} + +static void pdc_set_dir(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma *pd = to_pd(chan->device); + u32 val; + u32 mask_mode; + u32 mask_ctl; + + if (chan->chan_id < 8) { + val = dma_readl(pd, CTL0); + + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id); + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val &= mask_mode; + if (pd_chan->dir == DMA_MEM_TO_DEV) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS)); + + val |= mask_ctl; + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + val = dma_readl(pd, CTL3); + + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch); + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + val &= mask_mode; + if (pd_chan->dir == DMA_MEM_TO_DEV) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS)); + val |= mask_ctl; + dma_writel(pd, CTL3, val); + } + + dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n", + chan->chan_id, val); +} + +static void pdc_set_mode(struct dma_chan *chan, u32 mode) +{ + struct pch_dma *pd = to_pd(chan->device); + u32 val; + u32 mask_ctl; + u32 mask_dir; + + if (chan->chan_id < 8) { + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\ + DMA_CTL0_DIR_SHIFT_BITS); + val = dma_readl(pd, CTL0); + val &= mask_dir; + val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); + val |= mask_ctl; + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\ + DMA_CTL0_DIR_SHIFT_BITS); + val = dma_readl(pd, CTL3); + val &= mask_dir; + val |= mode << (DMA_CTL0_BITS_PER_CH * ch); + val |= mask_ctl; + dma_writel(pd, CTL3, val); + } + + dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", + chan->chan_id, val); +} + +static u32 pdc_get_status0(struct pch_dma_chan *pd_chan) +{ + struct pch_dma *pd = to_pd(pd_chan->chan.device); + u32 val; + + val = dma_readl(pd, STS0); + return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + + DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id)); +} + +static u32 pdc_get_status2(struct pch_dma_chan *pd_chan) +{ + struct pch_dma *pd = to_pd(pd_chan->chan.device); + u32 val; + + val = dma_readl(pd, STS2); + return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + + DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8))); +} + +static bool pdc_is_idle(struct pch_dma_chan *pd_chan) +{ + u32 sts; + + if (pd_chan->chan.chan_id < 8) + sts = pdc_get_status0(pd_chan); + else + sts = pdc_get_status2(pd_chan); + + + if (sts == DMA_STATUS_IDLE) + return true; + else + return false; +} + +static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc) +{ + if (!pdc_is_idle(pd_chan)) { + dev_err(chan2dev(&pd_chan->chan), + "BUG: Attempt to start non-idle channel\n"); + return; + } + + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> dev_addr: %x\n", + pd_chan->chan.chan_id, desc->regs.dev_addr); + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> mem_addr: %x\n", + pd_chan->chan.chan_id, desc->regs.mem_addr); + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> size: %x\n", + pd_chan->chan.chan_id, desc->regs.size); + dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> next: %x\n", + pd_chan->chan.chan_id, desc->regs.next); + + if (list_empty(&desc->tx_list)) { + channel_writel(pd_chan, DEV_ADDR, desc->regs.dev_addr); + channel_writel(pd_chan, MEM_ADDR, desc->regs.mem_addr); + channel_writel(pd_chan, SIZE, desc->regs.size); + channel_writel(pd_chan, NEXT, desc->regs.next); + pdc_set_mode(&pd_chan->chan, DMA_CTL0_ONESHOT); + } else { + channel_writel(pd_chan, NEXT, desc->txd.phys); + pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG); + } +} + +static void pdc_chain_complete(struct pch_dma_chan *pd_chan, + struct pch_dma_desc *desc) +{ + struct dma_async_tx_descriptor *txd = &desc->txd; + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + list_splice_init(&desc->tx_list, &pd_chan->free_list); + list_move(&desc->desc_node, &pd_chan->free_list); + + if (callback) + callback(param); +} + +static void pdc_complete_all(struct pch_dma_chan *pd_chan) +{ + struct pch_dma_desc *desc, *_d; + LIST_HEAD(list); + + BUG_ON(!pdc_is_idle(pd_chan)); + + if (!list_empty(&pd_chan->queue)) + pdc_dostart(pd_chan, pdc_first_queued(pd_chan)); + + list_splice_init(&pd_chan->active_list, &list); + list_splice_init(&pd_chan->queue, &pd_chan->active_list); + + list_for_each_entry_safe(desc, _d, &list, desc_node) + pdc_chain_complete(pd_chan, desc); +} + +static void pdc_handle_error(struct pch_dma_chan *pd_chan) +{ + struct pch_dma_desc *bad_desc; + + bad_desc = pdc_first_active(pd_chan); + list_del(&bad_desc->desc_node); + + list_splice_init(&pd_chan->queue, pd_chan->active_list.prev); + + if (!list_empty(&pd_chan->active_list)) + pdc_dostart(pd_chan, pdc_first_active(pd_chan)); + + dev_crit(chan2dev(&pd_chan->chan), "Bad descriptor submitted\n"); + dev_crit(chan2dev(&pd_chan->chan), "descriptor cookie: %d\n", + bad_desc->txd.cookie); + + pdc_chain_complete(pd_chan, bad_desc); +} + +static void pdc_advance_work(struct pch_dma_chan *pd_chan) +{ + if (list_empty(&pd_chan->active_list) || + list_is_singular(&pd_chan->active_list)) { + pdc_complete_all(pd_chan); + } else { + pdc_chain_complete(pd_chan, pdc_first_active(pd_chan)); + pdc_dostart(pd_chan, pdc_first_active(pd_chan)); + } +} + +static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct pch_dma_desc *desc = to_pd_desc(txd); + struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan); + dma_cookie_t cookie; + + spin_lock(&pd_chan->lock); + cookie = dma_cookie_assign(txd); + + if (list_empty(&pd_chan->active_list)) { + list_add_tail(&desc->desc_node, &pd_chan->active_list); + pdc_dostart(pd_chan, desc); + } else { + list_add_tail(&desc->desc_node, &pd_chan->queue); + } + + spin_unlock(&pd_chan->lock); + return 0; +} + +static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags) +{ + struct pch_dma_desc *desc = NULL; + struct pch_dma *pd = to_pd(chan->device); + dma_addr_t addr; + + desc = pci_pool_alloc(pd->pool, flags, &addr); + if (desc) { + memset(desc, 0, sizeof(struct pch_dma_desc)); + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = pd_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = addr; + } + + return desc; +} + +static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan) +{ + struct pch_dma_desc *desc, *_d; + struct pch_dma_desc *ret = NULL; + int i = 0; + + spin_lock(&pd_chan->lock); + list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) { + i++; + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc); + } + spin_unlock(&pd_chan->lock); + dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i); + + if (!ret) { + ret = pdc_alloc_desc(&pd_chan->chan, GFP_NOIO); + if (ret) { + spin_lock(&pd_chan->lock); + pd_chan->descs_allocated++; + spin_unlock(&pd_chan->lock); + } else { + dev_err(chan2dev(&pd_chan->chan), + "failed to alloc desc\n"); + } + } + + return ret; +} + +static void pdc_desc_put(struct pch_dma_chan *pd_chan, + struct pch_dma_desc *desc) +{ + if (desc) { + spin_lock(&pd_chan->lock); + list_splice_init(&desc->tx_list, &pd_chan->free_list); + list_add(&desc->desc_node, &pd_chan->free_list); + spin_unlock(&pd_chan->lock); + } +} + +static int pd_alloc_chan_resources(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma_desc *desc; + LIST_HEAD(tmp_list); + int i; + + if (!pdc_is_idle(pd_chan)) { + dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); + return -EIO; + } + + if (!list_empty(&pd_chan->free_list)) + return pd_chan->descs_allocated; + + for (i = 0; i < init_nr_desc_per_channel; i++) { + desc = pdc_alloc_desc(chan, GFP_KERNEL); + + if (!desc) { + dev_warn(chan2dev(chan), + "Only allocated %d initial descriptors\n", i); + break; + } + + list_add_tail(&desc->desc_node, &tmp_list); + } + + spin_lock_irq(&pd_chan->lock); + list_splice(&tmp_list, &pd_chan->free_list); + pd_chan->descs_allocated = i; + dma_cookie_init(chan); + spin_unlock_irq(&pd_chan->lock); + + pdc_enable_irq(chan, 1); + + return pd_chan->descs_allocated; +} + +static void pd_free_chan_resources(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma *pd = to_pd(chan->device); + struct pch_dma_desc *desc, *_d; + LIST_HEAD(tmp_list); + + BUG_ON(!pdc_is_idle(pd_chan)); + BUG_ON(!list_empty(&pd_chan->active_list)); + BUG_ON(!list_empty(&pd_chan->queue)); + + spin_lock_irq(&pd_chan->lock); + list_splice_init(&pd_chan->free_list, &tmp_list); + pd_chan->descs_allocated = 0; + spin_unlock_irq(&pd_chan->lock); + + list_for_each_entry_safe(desc, _d, &tmp_list, desc_node) + pci_pool_free(pd->pool, desc, desc->txd.phys); + + pdc_enable_irq(chan, 0); +} + +static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + enum dma_status ret; + + spin_lock_irq(&pd_chan->lock); + ret = dma_cookie_status(chan, cookie, txstate); + spin_unlock_irq(&pd_chan->lock); + + return ret; +} + +static void pd_issue_pending(struct dma_chan *chan) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + + if (pdc_is_idle(pd_chan)) { + spin_lock(&pd_chan->lock); + pdc_advance_work(pd_chan); + spin_unlock(&pd_chan->lock); + } +} + +static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma_slave *pd_slave = chan->private; + struct pch_dma_desc *first = NULL; + struct pch_dma_desc *prev = NULL; + struct pch_dma_desc *desc = NULL; + struct scatterlist *sg; + dma_addr_t reg; + int i; + + if (unlikely(!sg_len)) { + dev_info(chan2dev(chan), "prep_slave_sg: length is zero!\n"); + return NULL; + } + + if (direction == DMA_DEV_TO_MEM) + reg = pd_slave->rx_reg; + else if (direction == DMA_MEM_TO_DEV) + reg = pd_slave->tx_reg; + else + return NULL; + + pd_chan->dir = direction; + pdc_set_dir(chan); + + for_each_sg(sgl, sg, sg_len, i) { + desc = pdc_desc_get(pd_chan); + + if (!desc) + goto err_desc_get; + + desc->regs.dev_addr = reg; + desc->regs.mem_addr = sg_phys(sg); + desc->regs.size = sg_dma_len(sg); + desc->regs.next = DMA_DESC_FOLLOW_WITHOUT_IRQ; + + switch (pd_slave->width) { + case PCH_DMA_WIDTH_1_BYTE: + if (desc->regs.size > DMA_DESC_MAX_COUNT_1_BYTE) + goto err_desc_get; + desc->regs.size |= DMA_DESC_WIDTH_1_BYTE; + break; + case PCH_DMA_WIDTH_2_BYTES: + if (desc->regs.size > DMA_DESC_MAX_COUNT_2_BYTES) + goto err_desc_get; + desc->regs.size |= DMA_DESC_WIDTH_2_BYTES; + break; + case PCH_DMA_WIDTH_4_BYTES: + if (desc->regs.size > DMA_DESC_MAX_COUNT_4_BYTES) + goto err_desc_get; + desc->regs.size |= DMA_DESC_WIDTH_4_BYTES; + break; + default: + goto err_desc_get; + } + + if (!first) { + first = desc; + } else { + prev->regs.next |= desc->txd.phys; + list_add_tail(&desc->desc_node, &first->tx_list); + } + + prev = desc; + } + + if (flags & DMA_PREP_INTERRUPT) + desc->regs.next = DMA_DESC_END_WITH_IRQ; + else + desc->regs.next = DMA_DESC_END_WITHOUT_IRQ; + + first->txd.cookie = -EBUSY; + desc->txd.flags = flags; + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "failed to get desc or wrong parameters\n"); + pdc_desc_put(pd_chan, first); + return NULL; +} + +static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct pch_dma_chan *pd_chan = to_pd_chan(chan); + struct pch_dma_desc *desc, *_d; + LIST_HEAD(list); + + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + spin_lock_irq(&pd_chan->lock); + + pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE); + + list_splice_init(&pd_chan->active_list, &list); + list_splice_init(&pd_chan->queue, &list); + + list_for_each_entry_safe(desc, _d, &list, desc_node) + pdc_chain_complete(pd_chan, desc); + + spin_unlock_irq(&pd_chan->lock); + + return 0; +} + +static void pdc_tasklet(unsigned long data) +{ + struct pch_dma_chan *pd_chan = (struct pch_dma_chan *)data; + unsigned long flags; + + if (!pdc_is_idle(pd_chan)) { + dev_err(chan2dev(&pd_chan->chan), + "BUG: handle non-idle channel in tasklet\n"); + return; + } + + spin_lock_irqsave(&pd_chan->lock, flags); + if (test_and_clear_bit(0, &pd_chan->err_status)) + pdc_handle_error(pd_chan); + else + pdc_advance_work(pd_chan); + spin_unlock_irqrestore(&pd_chan->lock, flags); +} + +static irqreturn_t pd_irq(int irq, void *devid) +{ + struct pch_dma *pd = (struct pch_dma *)devid; + struct pch_dma_chan *pd_chan; + u32 sts0; + u32 sts2; + int i; + int ret0 = IRQ_NONE; + int ret2 = IRQ_NONE; + + sts0 = dma_readl(pd, STS0); + sts2 = dma_readl(pd, STS2); + + dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0); + + for (i = 0; i < pd->dma.chancnt; i++) { + pd_chan = &pd->channels[i]; + + if (i < 8) { + if (sts0 & DMA_STATUS_IRQ(i)) { + if (sts0 & DMA_STATUS0_ERR(i)) + set_bit(0, &pd_chan->err_status); + + tasklet_schedule(&pd_chan->tasklet); + ret0 = IRQ_HANDLED; + } + } else { + if (sts2 & DMA_STATUS_IRQ(i - 8)) { + if (sts2 & DMA_STATUS2_ERR(i)) + set_bit(0, &pd_chan->err_status); + + tasklet_schedule(&pd_chan->tasklet); + ret2 = IRQ_HANDLED; + } + } + } + + /* clear interrupt bits in status register */ + if (ret0) + dma_writel(pd, STS0, sts0); + if (ret2) + dma_writel(pd, STS2, sts2); + + return ret0 | ret2; +} + +#ifdef CONFIG_PM +static void pch_dma_save_regs(struct pch_dma *pd) +{ + struct pch_dma_chan *pd_chan; + struct dma_chan *chan, *_c; + int i = 0; + + pd->regs.dma_ctl0 = dma_readl(pd, CTL0); + pd->regs.dma_ctl1 = dma_readl(pd, CTL1); + pd->regs.dma_ctl2 = dma_readl(pd, CTL2); + pd->regs.dma_ctl3 = dma_readl(pd, CTL3); + + list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { + pd_chan = to_pd_chan(chan); + + pd->ch_regs[i].dev_addr = channel_readl(pd_chan, DEV_ADDR); + pd->ch_regs[i].mem_addr = channel_readl(pd_chan, MEM_ADDR); + pd->ch_regs[i].size = channel_readl(pd_chan, SIZE); + pd->ch_regs[i].next = channel_readl(pd_chan, NEXT); + + i++; + } +} + +static void pch_dma_restore_regs(struct pch_dma *pd) +{ + struct pch_dma_chan *pd_chan; + struct dma_chan *chan, *_c; + int i = 0; + + dma_writel(pd, CTL0, pd->regs.dma_ctl0); + dma_writel(pd, CTL1, pd->regs.dma_ctl1); + dma_writel(pd, CTL2, pd->regs.dma_ctl2); + dma_writel(pd, CTL3, pd->regs.dma_ctl3); + + list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { + pd_chan = to_pd_chan(chan); + + channel_writel(pd_chan, DEV_ADDR, pd->ch_regs[i].dev_addr); + channel_writel(pd_chan, MEM_ADDR, pd->ch_regs[i].mem_addr); + channel_writel(pd_chan, SIZE, pd->ch_regs[i].size); + channel_writel(pd_chan, NEXT, pd->ch_regs[i].next); + + i++; + } +} + +static int pch_dma_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct pch_dma *pd = pci_get_drvdata(pdev); + + if (pd) + pch_dma_save_regs(pd); + + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int pch_dma_resume(struct pci_dev *pdev) +{ + struct pch_dma *pd = pci_get_drvdata(pdev); + int err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + + err = pci_enable_device(pdev); + if (err) { + dev_dbg(&pdev->dev, "failed to enable device\n"); + return err; + } + + if (pd) + pch_dma_restore_regs(pd); + + return 0; +} +#endif + +static int __devinit pch_dma_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct pch_dma *pd; + struct pch_dma_regs *regs; + unsigned int nr_channels; + int err; + int i; + + nr_channels = id->driver_data; + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + pci_set_drvdata(pdev, pd); + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device\n"); + goto err_free_mem; + } + + if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Cannot find proper base address\n"); + goto err_disable_pdev; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); + goto err_disable_pdev; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Cannot set proper DMA config\n"); + goto err_free_res; + } + + regs = pd->membase = pci_iomap(pdev, 1, 0); + if (!pd->membase) { + dev_err(&pdev->dev, "Cannot map MMIO registers\n"); + err = -ENOMEM; + goto err_free_res; + } + + pci_set_master(pdev); + + err = request_irq(pdev->irq, pd_irq, IRQF_SHARED, DRV_NAME, pd); + if (err) { + dev_err(&pdev->dev, "Failed to request IRQ\n"); + goto err_iounmap; + } + + pd->pool = pci_pool_create("pch_dma_desc_pool", pdev, + sizeof(struct pch_dma_desc), 4, 0); + if (!pd->pool) { + dev_err(&pdev->dev, "Failed to alloc DMA descriptors\n"); + err = -ENOMEM; + goto err_free_irq; + } + + pd->dma.dev = &pdev->dev; + + INIT_LIST_HEAD(&pd->dma.channels); + + for (i = 0; i < nr_channels; i++) { + struct pch_dma_chan *pd_chan = &pd->channels[i]; + + pd_chan->chan.device = &pd->dma; + dma_cookie_init(&pd_chan->chan); + + pd_chan->membase = ®s->desc[i]; + + spin_lock_init(&pd_chan->lock); + + INIT_LIST_HEAD(&pd_chan->active_list); + INIT_LIST_HEAD(&pd_chan->queue); + INIT_LIST_HEAD(&pd_chan->free_list); + + tasklet_init(&pd_chan->tasklet, pdc_tasklet, + (unsigned long)pd_chan); + list_add_tail(&pd_chan->chan.device_node, &pd->dma.channels); + } + + dma_cap_zero(pd->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, pd->dma.cap_mask); + dma_cap_set(DMA_SLAVE, pd->dma.cap_mask); + + pd->dma.device_alloc_chan_resources = pd_alloc_chan_resources; + pd->dma.device_free_chan_resources = pd_free_chan_resources; + pd->dma.device_tx_status = pd_tx_status; + pd->dma.device_issue_pending = pd_issue_pending; + pd->dma.device_prep_slave_sg = pd_prep_slave_sg; + pd->dma.device_control = pd_device_control; + + err = dma_async_device_register(&pd->dma); + if (err) { + dev_err(&pdev->dev, "Failed to register DMA device\n"); + goto err_free_pool; + } + + return 0; + +err_free_pool: + pci_pool_destroy(pd->pool); +err_free_irq: + free_irq(pdev->irq, pd); +err_iounmap: + pci_iounmap(pdev, pd->membase); +err_free_res: + pci_release_regions(pdev); +err_disable_pdev: + pci_disable_device(pdev); +err_free_mem: + return err; +} + +static void __devexit pch_dma_remove(struct pci_dev *pdev) +{ + struct pch_dma *pd = pci_get_drvdata(pdev); + struct pch_dma_chan *pd_chan; + struct dma_chan *chan, *_c; + + if (pd) { + dma_async_device_unregister(&pd->dma); + + list_for_each_entry_safe(chan, _c, &pd->dma.channels, + device_node) { + pd_chan = to_pd_chan(chan); + + tasklet_disable(&pd_chan->tasklet); + tasklet_kill(&pd_chan->tasklet); + } + + pci_pool_destroy(pd->pool); + free_irq(pdev->irq, pd); + pci_iounmap(pdev, pd->membase); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(pd); + } +} + +/* PCI Device ID of DMA device */ +#define PCI_VENDOR_ID_ROHM 0x10DB +#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH 0x8810 +#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH 0x8815 +#define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 +#define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B +#define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034 +#define PCI_DEVICE_ID_ML7213_DMA4_12CH 0x8032 +#define PCI_DEVICE_ID_ML7223_DMA1_4CH 0x800B +#define PCI_DEVICE_ID_ML7223_DMA2_4CH 0x800E +#define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017 +#define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B +#define PCI_DEVICE_ID_ML7831_DMA1_8CH 0x8810 +#define PCI_DEVICE_ID_ML7831_DMA2_4CH 0x8815 + +DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA1_8CH), 8}, /* UART */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA2_4CH), 4}, /* SPI */ + { 0, }, +}; + +static struct pci_driver pch_dma_driver = { + .name = DRV_NAME, + .id_table = pch_dma_id_table, + .probe = pch_dma_probe, + .remove = __devexit_p(pch_dma_remove), +#ifdef CONFIG_PM + .suspend = pch_dma_suspend, + .resume = pch_dma_resume, +#endif +}; + +static int __init pch_dma_init(void) +{ + return pci_register_driver(&pch_dma_driver); +} + +static void __exit pch_dma_exit(void) +{ + pci_unregister_driver(&pch_dma_driver); +} + +module_init(pch_dma_init); +module_exit(pch_dma_exit); + +MODULE_DESCRIPTION("Intel EG20T PCH / LAPIS Semicon ML7213/ML7223/ML7831 IOH " + "DMA controller driver"); +MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c new file mode 100644 index 00000000..8c44f17a --- /dev/null +++ b/drivers/dma/pl330.c @@ -0,0 +1,3119 @@ +/* + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Copyright (C) 2010 Samsung Electronics Co. Ltd. + * Jaswinder Singh <jassi.brar@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/interrupt.h> +#include <linux/amba/bus.h> +#include <linux/amba/pl330.h> +#include <linux/pm_runtime.h> +#include <linux/scatterlist.h> +#include <linux/of.h> + +#include "dmaengine.h" +#define PL330_MAX_CHAN 8 +#define PL330_MAX_IRQS 32 +#define PL330_MAX_PERI 32 + +enum pl330_srccachectrl { + SCCTRL0, /* Noncacheable and nonbufferable */ + SCCTRL1, /* Bufferable only */ + SCCTRL2, /* Cacheable, but do not allocate */ + SCCTRL3, /* Cacheable and bufferable, but do not allocate */ + SINVALID1, + SINVALID2, + SCCTRL6, /* Cacheable write-through, allocate on reads only */ + SCCTRL7, /* Cacheable write-back, allocate on reads only */ +}; + +enum pl330_dstcachectrl { + DCCTRL0, /* Noncacheable and nonbufferable */ + DCCTRL1, /* Bufferable only */ + DCCTRL2, /* Cacheable, but do not allocate */ + DCCTRL3, /* Cacheable and bufferable, but do not allocate */ + DINVALID1, /* AWCACHE = 0x1000 */ + DINVALID2, + DCCTRL6, /* Cacheable write-through, allocate on writes only */ + DCCTRL7, /* Cacheable write-back, allocate on writes only */ +}; + +enum pl330_byteswap { + SWAP_NO, + SWAP_2, + SWAP_4, + SWAP_8, + SWAP_16, +}; + +enum pl330_reqtype { + MEMTOMEM, + MEMTODEV, + DEVTOMEM, + DEVTODEV, +}; + +/* Register and Bit field Definitions */ +#define DS 0x0 +#define DS_ST_STOP 0x0 +#define DS_ST_EXEC 0x1 +#define DS_ST_CMISS 0x2 +#define DS_ST_UPDTPC 0x3 +#define DS_ST_WFE 0x4 +#define DS_ST_ATBRR 0x5 +#define DS_ST_QBUSY 0x6 +#define DS_ST_WFP 0x7 +#define DS_ST_KILL 0x8 +#define DS_ST_CMPLT 0x9 +#define DS_ST_FLTCMP 0xe +#define DS_ST_FAULT 0xf + +#define DPC 0x4 +#define INTEN 0x20 +#define ES 0x24 +#define INTSTATUS 0x28 +#define INTCLR 0x2c +#define FSM 0x30 +#define FSC 0x34 +#define FTM 0x38 + +#define _FTC 0x40 +#define FTC(n) (_FTC + (n)*0x4) + +#define _CS 0x100 +#define CS(n) (_CS + (n)*0x8) +#define CS_CNS (1 << 21) + +#define _CPC 0x104 +#define CPC(n) (_CPC + (n)*0x8) + +#define _SA 0x400 +#define SA(n) (_SA + (n)*0x20) + +#define _DA 0x404 +#define DA(n) (_DA + (n)*0x20) + +#define _CC 0x408 +#define CC(n) (_CC + (n)*0x20) + +#define CC_SRCINC (1 << 0) +#define CC_DSTINC (1 << 14) +#define CC_SRCPRI (1 << 8) +#define CC_DSTPRI (1 << 22) +#define CC_SRCNS (1 << 9) +#define CC_DSTNS (1 << 23) +#define CC_SRCIA (1 << 10) +#define CC_DSTIA (1 << 24) +#define CC_SRCBRSTLEN_SHFT 4 +#define CC_DSTBRSTLEN_SHFT 18 +#define CC_SRCBRSTSIZE_SHFT 1 +#define CC_DSTBRSTSIZE_SHFT 15 +#define CC_SRCCCTRL_SHFT 11 +#define CC_SRCCCTRL_MASK 0x7 +#define CC_DSTCCTRL_SHFT 25 +#define CC_DRCCCTRL_MASK 0x7 +#define CC_SWAP_SHFT 28 + +#define _LC0 0x40c +#define LC0(n) (_LC0 + (n)*0x20) + +#define _LC1 0x410 +#define LC1(n) (_LC1 + (n)*0x20) + +#define DBGSTATUS 0xd00 +#define DBG_BUSY (1 << 0) + +#define DBGCMD 0xd04 +#define DBGINST0 0xd08 +#define DBGINST1 0xd0c + +#define CR0 0xe00 +#define CR1 0xe04 +#define CR2 0xe08 +#define CR3 0xe0c +#define CR4 0xe10 +#define CRD 0xe14 + +#define PERIPH_ID 0xfe0 +#define PERIPH_REV_SHIFT 20 +#define PERIPH_REV_MASK 0xf +#define PERIPH_REV_R0P0 0 +#define PERIPH_REV_R1P0 1 +#define PERIPH_REV_R1P1 2 +#define PCELL_ID 0xff0 + +#define CR0_PERIPH_REQ_SET (1 << 0) +#define CR0_BOOT_EN_SET (1 << 1) +#define CR0_BOOT_MAN_NS (1 << 2) +#define CR0_NUM_CHANS_SHIFT 4 +#define CR0_NUM_CHANS_MASK 0x7 +#define CR0_NUM_PERIPH_SHIFT 12 +#define CR0_NUM_PERIPH_MASK 0x1f +#define CR0_NUM_EVENTS_SHIFT 17 +#define CR0_NUM_EVENTS_MASK 0x1f + +#define CR1_ICACHE_LEN_SHIFT 0 +#define CR1_ICACHE_LEN_MASK 0x7 +#define CR1_NUM_ICACHELINES_SHIFT 4 +#define CR1_NUM_ICACHELINES_MASK 0xf + +#define CRD_DATA_WIDTH_SHIFT 0 +#define CRD_DATA_WIDTH_MASK 0x7 +#define CRD_WR_CAP_SHIFT 4 +#define CRD_WR_CAP_MASK 0x7 +#define CRD_WR_Q_DEP_SHIFT 8 +#define CRD_WR_Q_DEP_MASK 0xf +#define CRD_RD_CAP_SHIFT 12 +#define CRD_RD_CAP_MASK 0x7 +#define CRD_RD_Q_DEP_SHIFT 16 +#define CRD_RD_Q_DEP_MASK 0xf +#define CRD_DATA_BUFF_SHIFT 20 +#define CRD_DATA_BUFF_MASK 0x3ff + +#define PART 0x330 +#define DESIGNER 0x41 +#define REVISION 0x0 +#define INTEG_CFG 0x0 +#define PERIPH_ID_VAL ((PART << 0) | (DESIGNER << 12)) + +#define PCELL_ID_VAL 0xb105f00d + +#define PL330_STATE_STOPPED (1 << 0) +#define PL330_STATE_EXECUTING (1 << 1) +#define PL330_STATE_WFE (1 << 2) +#define PL330_STATE_FAULTING (1 << 3) +#define PL330_STATE_COMPLETING (1 << 4) +#define PL330_STATE_WFP (1 << 5) +#define PL330_STATE_KILLING (1 << 6) +#define PL330_STATE_FAULT_COMPLETING (1 << 7) +#define PL330_STATE_CACHEMISS (1 << 8) +#define PL330_STATE_UPDTPC (1 << 9) +#define PL330_STATE_ATBARRIER (1 << 10) +#define PL330_STATE_QUEUEBUSY (1 << 11) +#define PL330_STATE_INVALID (1 << 15) + +#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \ + | PL330_STATE_WFE | PL330_STATE_FAULTING) + +#define CMD_DMAADDH 0x54 +#define CMD_DMAEND 0x00 +#define CMD_DMAFLUSHP 0x35 +#define CMD_DMAGO 0xa0 +#define CMD_DMALD 0x04 +#define CMD_DMALDP 0x25 +#define CMD_DMALP 0x20 +#define CMD_DMALPEND 0x28 +#define CMD_DMAKILL 0x01 +#define CMD_DMAMOV 0xbc +#define CMD_DMANOP 0x18 +#define CMD_DMARMB 0x12 +#define CMD_DMASEV 0x34 +#define CMD_DMAST 0x08 +#define CMD_DMASTP 0x29 +#define CMD_DMASTZ 0x0c +#define CMD_DMAWFE 0x36 +#define CMD_DMAWFP 0x30 +#define CMD_DMAWMB 0x13 + +#define SZ_DMAADDH 3 +#define SZ_DMAEND 1 +#define SZ_DMAFLUSHP 2 +#define SZ_DMALD 1 +#define SZ_DMALDP 2 +#define SZ_DMALP 2 +#define SZ_DMALPEND 2 +#define SZ_DMAKILL 1 +#define SZ_DMAMOV 6 +#define SZ_DMANOP 1 +#define SZ_DMARMB 1 +#define SZ_DMASEV 2 +#define SZ_DMAST 1 +#define SZ_DMASTP 2 +#define SZ_DMASTZ 1 +#define SZ_DMAWFE 2 +#define SZ_DMAWFP 2 +#define SZ_DMAWMB 1 +#define SZ_DMAGO 6 + +#define BRST_LEN(ccr) ((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1) +#define BRST_SIZE(ccr) (1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7)) + +#define BYTE_TO_BURST(b, ccr) ((b) / BRST_SIZE(ccr) / BRST_LEN(ccr)) +#define BURST_TO_BYTE(c, ccr) ((c) * BRST_SIZE(ccr) * BRST_LEN(ccr)) + +/* + * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req + * at 1byte/burst for P<->M and M<->M respectively. + * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req + * should be enough for P<->M and M<->M respectively. + */ +#define MCODE_BUFF_PER_REQ 256 + +/* If the _pl330_req is available to the client */ +#define IS_FREE(req) (*((u8 *)((req)->mc_cpu)) == CMD_DMAEND) + +/* Use this _only_ to wait on transient states */ +#define UNTIL(t, s) while (!(_state(t) & (s))) cpu_relax(); + +#ifdef PL330_DEBUG_MCGEN +static unsigned cmd_line; +#define PL330_DBGCMD_DUMP(off, x...) do { \ + printk("%x:", cmd_line); \ + printk(x); \ + cmd_line += off; \ + } while (0) +#define PL330_DBGMC_START(addr) (cmd_line = addr) +#else +#define PL330_DBGCMD_DUMP(off, x...) do {} while (0) +#define PL330_DBGMC_START(addr) do {} while (0) +#endif + +/* The number of default descriptors */ + +#define NR_DEFAULT_DESC 16 + +/* Populated by the PL330 core driver for DMA API driver's info */ +struct pl330_config { + u32 periph_id; + u32 pcell_id; +#define DMAC_MODE_NS (1 << 0) + unsigned int mode; + unsigned int data_bus_width:10; /* In number of bits */ + unsigned int data_buf_dep:10; + unsigned int num_chan:4; + unsigned int num_peri:6; + u32 peri_ns; + unsigned int num_events:6; + u32 irq_ns; +}; + +/* Handle to the DMAC provided to the PL330 core */ +struct pl330_info { + /* Owning device */ + struct device *dev; + /* Size of MicroCode buffers for each channel. */ + unsigned mcbufsz; + /* ioremap'ed address of PL330 registers. */ + void __iomem *base; + /* Client can freely use it. */ + void *client_data; + /* PL330 core data, Client must not touch it. */ + void *pl330_data; + /* Populated by the PL330 core driver during pl330_add */ + struct pl330_config pcfg; + /* + * If the DMAC has some reset mechanism, then the + * client may want to provide pointer to the method. + */ + void (*dmac_reset)(struct pl330_info *pi); +}; + +/** + * Request Configuration. + * The PL330 core does not modify this and uses the last + * working configuration if the request doesn't provide any. + * + * The Client may want to provide this info only for the + * first request and a request with new settings. + */ +struct pl330_reqcfg { + /* Address Incrementing */ + unsigned dst_inc:1; + unsigned src_inc:1; + + /* + * For now, the SRC & DST protection levels + * and burst size/length are assumed same. + */ + bool nonsecure; + bool privileged; + bool insnaccess; + unsigned brst_len:5; + unsigned brst_size:3; /* in power of 2 */ + + enum pl330_dstcachectrl dcctl; + enum pl330_srccachectrl scctl; + enum pl330_byteswap swap; + struct pl330_config *pcfg; +}; + +/* + * One cycle of DMAC operation. + * There may be more than one xfer in a request. + */ +struct pl330_xfer { + u32 src_addr; + u32 dst_addr; + /* Size to xfer */ + u32 bytes; + /* + * Pointer to next xfer in the list. + * The last xfer in the req must point to NULL. + */ + struct pl330_xfer *next; +}; + +/* The xfer callbacks are made with one of these arguments. */ +enum pl330_op_err { + /* The all xfers in the request were success. */ + PL330_ERR_NONE, + /* If req aborted due to global error. */ + PL330_ERR_ABORT, + /* If req failed due to problem with Channel. */ + PL330_ERR_FAIL, +}; + +/* A request defining Scatter-Gather List ending with NULL xfer. */ +struct pl330_req { + enum pl330_reqtype rqtype; + /* Index of peripheral for the xfer. */ + unsigned peri:5; + /* Unique token for this xfer, set by the client. */ + void *token; + /* Callback to be called after xfer. */ + void (*xfer_cb)(void *token, enum pl330_op_err err); + /* If NULL, req will be done at last set parameters. */ + struct pl330_reqcfg *cfg; + /* Pointer to first xfer in the request. */ + struct pl330_xfer *x; +}; + +/* + * To know the status of the channel and DMAC, the client + * provides a pointer to this structure. The PL330 core + * fills it with current information. + */ +struct pl330_chanstatus { + /* + * If the DMAC engine halted due to some error, + * the client should remove-add DMAC. + */ + bool dmac_halted; + /* + * If channel is halted due to some error, + * the client should ABORT/FLUSH and START the channel. + */ + bool faulting; + /* Location of last load */ + u32 src_addr; + /* Location of last store */ + u32 dst_addr; + /* + * Pointer to the currently active req, NULL if channel is + * inactive, even though the requests may be present. + */ + struct pl330_req *top_req; + /* Pointer to req waiting second in the queue if any. */ + struct pl330_req *wait_req; +}; + +enum pl330_chan_op { + /* Start the channel */ + PL330_OP_START, + /* Abort the active xfer */ + PL330_OP_ABORT, + /* Stop xfer and flush queue */ + PL330_OP_FLUSH, +}; + +struct _xfer_spec { + u32 ccr; + struct pl330_req *r; + struct pl330_xfer *x; +}; + +enum dmamov_dst { + SAR = 0, + CCR, + DAR, +}; + +enum pl330_dst { + SRC = 0, + DST, +}; + +enum pl330_cond { + SINGLE, + BURST, + ALWAYS, +}; + +struct _pl330_req { + u32 mc_bus; + void *mc_cpu; + /* Number of bytes taken to setup MC for the req */ + u32 mc_len; + struct pl330_req *r; + /* Hook to attach to DMAC's list of reqs with due callback */ + struct list_head rqd; +}; + +/* ToBeDone for tasklet */ +struct _pl330_tbd { + bool reset_dmac; + bool reset_mngr; + u8 reset_chan; +}; + +/* A DMAC Thread */ +struct pl330_thread { + u8 id; + int ev; + /* If the channel is not yet acquired by any client */ + bool free; + /* Parent DMAC */ + struct pl330_dmac *dmac; + /* Only two at a time */ + struct _pl330_req req[2]; + /* Index of the last enqueued request */ + unsigned lstenq; + /* Index of the last submitted request or -1 if the DMA is stopped */ + int req_running; +}; + +enum pl330_dmac_state { + UNINIT, + INIT, + DYING, +}; + +/* A DMAC */ +struct pl330_dmac { + spinlock_t lock; + /* Holds list of reqs with due callbacks */ + struct list_head req_done; + /* Pointer to platform specific stuff */ + struct pl330_info *pinfo; + /* Maximum possible events/irqs */ + int events[32]; + /* BUS address of MicroCode buffer */ + u32 mcode_bus; + /* CPU address of MicroCode buffer */ + void *mcode_cpu; + /* List of all Channel threads */ + struct pl330_thread *channels; + /* Pointer to the MANAGER thread */ + struct pl330_thread *manager; + /* To handle bad news in interrupt */ + struct tasklet_struct tasks; + struct _pl330_tbd dmac_tbd; + /* State of DMAC operation */ + enum pl330_dmac_state state; +}; + +enum desc_status { + /* In the DMAC pool */ + FREE, + /* + * Allocted to some channel during prep_xxx + * Also may be sitting on the work_list. + */ + PREP, + /* + * Sitting on the work_list and already submitted + * to the PL330 core. Not more than two descriptors + * of a channel can be BUSY at any time. + */ + BUSY, + /* + * Sitting on the channel work_list but xfer done + * by PL330 core + */ + DONE, +}; + +struct dma_pl330_chan { + /* Schedule desc completion */ + struct tasklet_struct task; + + /* DMA-Engine Channel */ + struct dma_chan chan; + + /* List of to be xfered descriptors */ + struct list_head work_list; + + /* Pointer to the DMAC that manages this channel, + * NULL if the channel is available to be acquired. + * As the parent, this DMAC also provides descriptors + * to the channel. + */ + struct dma_pl330_dmac *dmac; + + /* To protect channel manipulation */ + spinlock_t lock; + + /* Token of a hardware channel thread of PL330 DMAC + * NULL if the channel is available to be acquired. + */ + void *pl330_chid; + + /* For D-to-M and M-to-D channels */ + int burst_sz; /* the peripheral fifo width */ + int burst_len; /* the number of burst */ + dma_addr_t fifo_addr; + + /* for cyclic capability */ + bool cyclic; +}; + +struct dma_pl330_dmac { + struct pl330_info pif; + + /* DMA-Engine Device */ + struct dma_device ddma; + + /* Pool of descriptors available for the DMAC's channels */ + struct list_head desc_pool; + /* To protect desc_pool manipulation */ + spinlock_t pool_lock; + + /* Peripheral channels connected to this DMAC */ + struct dma_pl330_chan *peripherals; /* keep at end */ + + struct clk *clk; +}; + +struct dma_pl330_desc { + /* To attach to a queue as child */ + struct list_head node; + + /* Descriptor for the DMA Engine API */ + struct dma_async_tx_descriptor txd; + + /* Xfer for PL330 core */ + struct pl330_xfer px; + + struct pl330_reqcfg rqcfg; + struct pl330_req req; + + enum desc_status status; + + /* The channel which currently holds this desc */ + struct dma_pl330_chan *pchan; +}; + +static inline void _callback(struct pl330_req *r, enum pl330_op_err err) +{ + if (r && r->xfer_cb) + r->xfer_cb(r->token, err); +} + +static inline bool _queue_empty(struct pl330_thread *thrd) +{ + return (IS_FREE(&thrd->req[0]) && IS_FREE(&thrd->req[1])) + ? true : false; +} + +static inline bool _queue_full(struct pl330_thread *thrd) +{ + return (IS_FREE(&thrd->req[0]) || IS_FREE(&thrd->req[1])) + ? false : true; +} + +static inline bool is_manager(struct pl330_thread *thrd) +{ + struct pl330_dmac *pl330 = thrd->dmac; + + /* MANAGER is indexed at the end */ + if (thrd->id == pl330->pinfo->pcfg.num_chan) + return true; + else + return false; +} + +/* If manager of the thread is in Non-Secure mode */ +static inline bool _manager_ns(struct pl330_thread *thrd) +{ + struct pl330_dmac *pl330 = thrd->dmac; + + return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false; +} + +static inline u32 get_id(struct pl330_info *pi, u32 off) +{ + void __iomem *regs = pi->base; + u32 id = 0; + + id |= (readb(regs + off + 0x0) << 0); + id |= (readb(regs + off + 0x4) << 8); + id |= (readb(regs + off + 0x8) << 16); + id |= (readb(regs + off + 0xc) << 24); + + return id; +} + +static inline u32 get_revision(u32 periph_id) +{ + return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK; +} + +static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[], + enum pl330_dst da, u16 val) +{ + if (dry_run) + return SZ_DMAADDH; + + buf[0] = CMD_DMAADDH; + buf[0] |= (da << 1); + *((u16 *)&buf[1]) = val; + + PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n", + da == 1 ? "DA" : "SA", val); + + return SZ_DMAADDH; +} + +static inline u32 _emit_END(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMAEND; + + buf[0] = CMD_DMAEND; + + PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n"); + + return SZ_DMAEND; +} + +static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri) +{ + if (dry_run) + return SZ_DMAFLUSHP; + + buf[0] = CMD_DMAFLUSHP; + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3); + + return SZ_DMAFLUSHP; +} + +static inline u32 _emit_LD(unsigned dry_run, u8 buf[], enum pl330_cond cond) +{ + if (dry_run) + return SZ_DMALD; + + buf[0] = CMD_DMALD; + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (1 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (1 << 0); + + PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A')); + + return SZ_DMALD; +} + +static inline u32 _emit_LDP(unsigned dry_run, u8 buf[], + enum pl330_cond cond, u8 peri) +{ + if (dry_run) + return SZ_DMALDP; + + buf[0] = CMD_DMALDP; + + if (cond == BURST) + buf[0] |= (1 << 1); + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n", + cond == SINGLE ? 'S' : 'B', peri >> 3); + + return SZ_DMALDP; +} + +static inline u32 _emit_LP(unsigned dry_run, u8 buf[], + unsigned loop, u8 cnt) +{ + if (dry_run) + return SZ_DMALP; + + buf[0] = CMD_DMALP; + + if (loop) + buf[0] |= (1 << 1); + + cnt--; /* DMAC increments by 1 internally */ + buf[1] = cnt; + + PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt); + + return SZ_DMALP; +} + +struct _arg_LPEND { + enum pl330_cond cond; + bool forever; + unsigned loop; + u8 bjump; +}; + +static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[], + const struct _arg_LPEND *arg) +{ + enum pl330_cond cond = arg->cond; + bool forever = arg->forever; + unsigned loop = arg->loop; + u8 bjump = arg->bjump; + + if (dry_run) + return SZ_DMALPEND; + + buf[0] = CMD_DMALPEND; + + if (loop) + buf[0] |= (1 << 2); + + if (!forever) + buf[0] |= (1 << 4); + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (1 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (1 << 0); + + buf[1] = bjump; + + PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n", + forever ? "FE" : "END", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'), + loop ? '1' : '0', + bjump); + + return SZ_DMALPEND; +} + +static inline u32 _emit_KILL(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMAKILL; + + buf[0] = CMD_DMAKILL; + + return SZ_DMAKILL; +} + +static inline u32 _emit_MOV(unsigned dry_run, u8 buf[], + enum dmamov_dst dst, u32 val) +{ + if (dry_run) + return SZ_DMAMOV; + + buf[0] = CMD_DMAMOV; + buf[1] = dst; + *((u32 *)&buf[2]) = val; + + PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n", + dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val); + + return SZ_DMAMOV; +} + +static inline u32 _emit_NOP(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMANOP; + + buf[0] = CMD_DMANOP; + + PL330_DBGCMD_DUMP(SZ_DMANOP, "\tDMANOP\n"); + + return SZ_DMANOP; +} + +static inline u32 _emit_RMB(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMARMB; + + buf[0] = CMD_DMARMB; + + PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n"); + + return SZ_DMARMB; +} + +static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev) +{ + if (dry_run) + return SZ_DMASEV; + + buf[0] = CMD_DMASEV; + + ev &= 0x1f; + ev <<= 3; + buf[1] = ev; + + PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3); + + return SZ_DMASEV; +} + +static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond) +{ + if (dry_run) + return SZ_DMAST; + + buf[0] = CMD_DMAST; + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (1 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (1 << 0); + + PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A')); + + return SZ_DMAST; +} + +static inline u32 _emit_STP(unsigned dry_run, u8 buf[], + enum pl330_cond cond, u8 peri) +{ + if (dry_run) + return SZ_DMASTP; + + buf[0] = CMD_DMASTP; + + if (cond == BURST) + buf[0] |= (1 << 1); + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n", + cond == SINGLE ? 'S' : 'B', peri >> 3); + + return SZ_DMASTP; +} + +static inline u32 _emit_STZ(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMASTZ; + + buf[0] = CMD_DMASTZ; + + PL330_DBGCMD_DUMP(SZ_DMASTZ, "\tDMASTZ\n"); + + return SZ_DMASTZ; +} + +static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev, + unsigned invalidate) +{ + if (dry_run) + return SZ_DMAWFE; + + buf[0] = CMD_DMAWFE; + + ev &= 0x1f; + ev <<= 3; + buf[1] = ev; + + if (invalidate) + buf[1] |= (1 << 1); + + PL330_DBGCMD_DUMP(SZ_DMAWFE, "\tDMAWFE %u%s\n", + ev >> 3, invalidate ? ", I" : ""); + + return SZ_DMAWFE; +} + +static inline u32 _emit_WFP(unsigned dry_run, u8 buf[], + enum pl330_cond cond, u8 peri) +{ + if (dry_run) + return SZ_DMAWFP; + + buf[0] = CMD_DMAWFP; + + if (cond == SINGLE) + buf[0] |= (0 << 1) | (0 << 0); + else if (cond == BURST) + buf[0] |= (1 << 1) | (0 << 0); + else + buf[0] |= (0 << 1) | (1 << 0); + + peri &= 0x1f; + peri <<= 3; + buf[1] = peri; + + PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n", + cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3); + + return SZ_DMAWFP; +} + +static inline u32 _emit_WMB(unsigned dry_run, u8 buf[]) +{ + if (dry_run) + return SZ_DMAWMB; + + buf[0] = CMD_DMAWMB; + + PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n"); + + return SZ_DMAWMB; +} + +struct _arg_GO { + u8 chan; + u32 addr; + unsigned ns; +}; + +static inline u32 _emit_GO(unsigned dry_run, u8 buf[], + const struct _arg_GO *arg) +{ + u8 chan = arg->chan; + u32 addr = arg->addr; + unsigned ns = arg->ns; + + if (dry_run) + return SZ_DMAGO; + + buf[0] = CMD_DMAGO; + buf[0] |= (ns << 1); + + buf[1] = chan & 0x7; + + *((u32 *)&buf[2]) = addr; + + return SZ_DMAGO; +} + +#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t) + +/* Returns Time-Out */ +static bool _until_dmac_idle(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->pinfo->base; + unsigned long loops = msecs_to_loops(5); + + do { + /* Until Manager is Idle */ + if (!(readl(regs + DBGSTATUS) & DBG_BUSY)) + break; + + cpu_relax(); + } while (--loops); + + if (!loops) + return true; + + return false; +} + +static inline void _execute_DBGINSN(struct pl330_thread *thrd, + u8 insn[], bool as_manager) +{ + void __iomem *regs = thrd->dmac->pinfo->base; + u32 val; + + val = (insn[0] << 16) | (insn[1] << 24); + if (!as_manager) { + val |= (1 << 0); + val |= (thrd->id << 8); /* Channel Number */ + } + writel(val, regs + DBGINST0); + + val = *((u32 *)&insn[2]); + writel(val, regs + DBGINST1); + + /* If timed out due to halted state-machine */ + if (_until_dmac_idle(thrd)) { + dev_err(thrd->dmac->pinfo->dev, "DMAC halted!\n"); + return; + } + + /* Get going */ + writel(0, regs + DBGCMD); +} + +/* + * Mark a _pl330_req as free. + * We do it by writing DMAEND as the first instruction + * because no valid request is going to have DMAEND as + * its first instruction to execute. + */ +static void mark_free(struct pl330_thread *thrd, int idx) +{ + struct _pl330_req *req = &thrd->req[idx]; + + _emit_END(0, req->mc_cpu); + req->mc_len = 0; + + thrd->req_running = -1; +} + +static inline u32 _state(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->pinfo->base; + u32 val; + + if (is_manager(thrd)) + val = readl(regs + DS) & 0xf; + else + val = readl(regs + CS(thrd->id)) & 0xf; + + switch (val) { + case DS_ST_STOP: + return PL330_STATE_STOPPED; + case DS_ST_EXEC: + return PL330_STATE_EXECUTING; + case DS_ST_CMISS: + return PL330_STATE_CACHEMISS; + case DS_ST_UPDTPC: + return PL330_STATE_UPDTPC; + case DS_ST_WFE: + return PL330_STATE_WFE; + case DS_ST_FAULT: + return PL330_STATE_FAULTING; + case DS_ST_ATBRR: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_ATBARRIER; + case DS_ST_QBUSY: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_QUEUEBUSY; + case DS_ST_WFP: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_WFP; + case DS_ST_KILL: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_KILLING; + case DS_ST_CMPLT: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_COMPLETING; + case DS_ST_FLTCMP: + if (is_manager(thrd)) + return PL330_STATE_INVALID; + else + return PL330_STATE_FAULT_COMPLETING; + default: + return PL330_STATE_INVALID; + } +} + +static void _stop(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->pinfo->base; + u8 insn[6] = {0, 0, 0, 0, 0, 0}; + + if (_state(thrd) == PL330_STATE_FAULT_COMPLETING) + UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING); + + /* Return if nothing needs to be done */ + if (_state(thrd) == PL330_STATE_COMPLETING + || _state(thrd) == PL330_STATE_KILLING + || _state(thrd) == PL330_STATE_STOPPED) + return; + + _emit_KILL(0, insn); + + /* Stop generating interrupts for SEV */ + writel(readl(regs + INTEN) & ~(1 << thrd->ev), regs + INTEN); + + _execute_DBGINSN(thrd, insn, is_manager(thrd)); +} + +/* Start doing req 'idx' of thread 'thrd' */ +static bool _trigger(struct pl330_thread *thrd) +{ + void __iomem *regs = thrd->dmac->pinfo->base; + struct _pl330_req *req; + struct pl330_req *r; + struct _arg_GO go; + unsigned ns; + u8 insn[6] = {0, 0, 0, 0, 0, 0}; + int idx; + + /* Return if already ACTIVE */ + if (_state(thrd) != PL330_STATE_STOPPED) + return true; + + idx = 1 - thrd->lstenq; + if (!IS_FREE(&thrd->req[idx])) + req = &thrd->req[idx]; + else { + idx = thrd->lstenq; + if (!IS_FREE(&thrd->req[idx])) + req = &thrd->req[idx]; + else + req = NULL; + } + + /* Return if no request */ + if (!req || !req->r) + return true; + + r = req->r; + + if (r->cfg) + ns = r->cfg->nonsecure ? 1 : 0; + else if (readl(regs + CS(thrd->id)) & CS_CNS) + ns = 1; + else + ns = 0; + + /* See 'Abort Sources' point-4 at Page 2-25 */ + if (_manager_ns(thrd) && !ns) + dev_info(thrd->dmac->pinfo->dev, "%s:%d Recipe for ABORT!\n", + __func__, __LINE__); + + go.chan = thrd->id; + go.addr = req->mc_bus; + go.ns = ns; + _emit_GO(0, insn, &go); + + /* Set to generate interrupts for SEV */ + writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN); + + /* Only manager can execute GO */ + _execute_DBGINSN(thrd, insn, true); + + thrd->req_running = idx; + + return true; +} + +static bool _start(struct pl330_thread *thrd) +{ + switch (_state(thrd)) { + case PL330_STATE_FAULT_COMPLETING: + UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING); + + if (_state(thrd) == PL330_STATE_KILLING) + UNTIL(thrd, PL330_STATE_STOPPED) + + case PL330_STATE_FAULTING: + _stop(thrd); + + case PL330_STATE_KILLING: + case PL330_STATE_COMPLETING: + UNTIL(thrd, PL330_STATE_STOPPED) + + case PL330_STATE_STOPPED: + return _trigger(thrd); + + case PL330_STATE_WFP: + case PL330_STATE_QUEUEBUSY: + case PL330_STATE_ATBARRIER: + case PL330_STATE_UPDTPC: + case PL330_STATE_CACHEMISS: + case PL330_STATE_EXECUTING: + return true; + + case PL330_STATE_WFE: /* For RESUME, nothing yet */ + default: + return false; + } +} + +static inline int _ldst_memtomem(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc) +{ + int off = 0; + struct pl330_config *pcfg = pxs->r->cfg->pcfg; + + /* check lock-up free version */ + if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) { + while (cyc--) { + off += _emit_LD(dry_run, &buf[off], ALWAYS); + off += _emit_ST(dry_run, &buf[off], ALWAYS); + } + } else { + while (cyc--) { + off += _emit_LD(dry_run, &buf[off], ALWAYS); + off += _emit_RMB(dry_run, &buf[off]); + off += _emit_ST(dry_run, &buf[off], ALWAYS); + off += _emit_WMB(dry_run, &buf[off]); + } + } + + return off; +} + +static inline int _ldst_devtomem(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc) +{ + int off = 0; + + while (cyc--) { + off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri); + off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri); + off += _emit_ST(dry_run, &buf[off], ALWAYS); + off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri); + } + + return off; +} + +static inline int _ldst_memtodev(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc) +{ + int off = 0; + + while (cyc--) { + off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri); + off += _emit_LD(dry_run, &buf[off], ALWAYS); + off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri); + off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri); + } + + return off; +} + +static int _bursts(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs, int cyc) +{ + int off = 0; + + switch (pxs->r->rqtype) { + case MEMTODEV: + off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc); + break; + case DEVTOMEM: + off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc); + break; + case MEMTOMEM: + off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc); + break; + default: + off += 0x40000000; /* Scare off the Client */ + break; + } + + return off; +} + +/* Returns bytes consumed and updates bursts */ +static inline int _loop(unsigned dry_run, u8 buf[], + unsigned long *bursts, const struct _xfer_spec *pxs) +{ + int cyc, cycmax, szlp, szlpend, szbrst, off; + unsigned lcnt0, lcnt1, ljmp0, ljmp1; + struct _arg_LPEND lpend; + + /* Max iterations possible in DMALP is 256 */ + if (*bursts >= 256*256) { + lcnt1 = 256; + lcnt0 = 256; + cyc = *bursts / lcnt1 / lcnt0; + } else if (*bursts > 256) { + lcnt1 = 256; + lcnt0 = *bursts / lcnt1; + cyc = 1; + } else { + lcnt1 = *bursts; + lcnt0 = 0; + cyc = 1; + } + + szlp = _emit_LP(1, buf, 0, 0); + szbrst = _bursts(1, buf, pxs, 1); + + lpend.cond = ALWAYS; + lpend.forever = false; + lpend.loop = 0; + lpend.bjump = 0; + szlpend = _emit_LPEND(1, buf, &lpend); + + if (lcnt0) { + szlp *= 2; + szlpend *= 2; + } + + /* + * Max bursts that we can unroll due to limit on the + * size of backward jump that can be encoded in DMALPEND + * which is 8-bits and hence 255 + */ + cycmax = (255 - (szlp + szlpend)) / szbrst; + + cyc = (cycmax < cyc) ? cycmax : cyc; + + off = 0; + + if (lcnt0) { + off += _emit_LP(dry_run, &buf[off], 0, lcnt0); + ljmp0 = off; + } + + off += _emit_LP(dry_run, &buf[off], 1, lcnt1); + ljmp1 = off; + + off += _bursts(dry_run, &buf[off], pxs, cyc); + + lpend.cond = ALWAYS; + lpend.forever = false; + lpend.loop = 1; + lpend.bjump = off - ljmp1; + off += _emit_LPEND(dry_run, &buf[off], &lpend); + + if (lcnt0) { + lpend.cond = ALWAYS; + lpend.forever = false; + lpend.loop = 0; + lpend.bjump = off - ljmp0; + off += _emit_LPEND(dry_run, &buf[off], &lpend); + } + + *bursts = lcnt1 * cyc; + if (lcnt0) + *bursts *= lcnt0; + + return off; +} + +static inline int _setup_loops(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs) +{ + struct pl330_xfer *x = pxs->x; + u32 ccr = pxs->ccr; + unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr); + int off = 0; + + while (bursts) { + c = bursts; + off += _loop(dry_run, &buf[off], &c, pxs); + bursts -= c; + } + + return off; +} + +static inline int _setup_xfer(unsigned dry_run, u8 buf[], + const struct _xfer_spec *pxs) +{ + struct pl330_xfer *x = pxs->x; + int off = 0; + + /* DMAMOV SAR, x->src_addr */ + off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr); + /* DMAMOV DAR, x->dst_addr */ + off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr); + + /* Setup Loop(s) */ + off += _setup_loops(dry_run, &buf[off], pxs); + + return off; +} + +/* + * A req is a sequence of one or more xfer units. + * Returns the number of bytes taken to setup the MC for the req. + */ +static int _setup_req(unsigned dry_run, struct pl330_thread *thrd, + unsigned index, struct _xfer_spec *pxs) +{ + struct _pl330_req *req = &thrd->req[index]; + struct pl330_xfer *x; + u8 *buf = req->mc_cpu; + int off = 0; + + PL330_DBGMC_START(req->mc_bus); + + /* DMAMOV CCR, ccr */ + off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr); + + x = pxs->r->x; + do { + /* Error if xfer length is not aligned at burst size */ + if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr))) + return -EINVAL; + + pxs->x = x; + off += _setup_xfer(dry_run, &buf[off], pxs); + + x = x->next; + } while (x); + + /* DMASEV peripheral/event */ + off += _emit_SEV(dry_run, &buf[off], thrd->ev); + /* DMAEND */ + off += _emit_END(dry_run, &buf[off]); + + return off; +} + +static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc) +{ + u32 ccr = 0; + + if (rqc->src_inc) + ccr |= CC_SRCINC; + + if (rqc->dst_inc) + ccr |= CC_DSTINC; + + /* We set same protection levels for Src and DST for now */ + if (rqc->privileged) + ccr |= CC_SRCPRI | CC_DSTPRI; + if (rqc->nonsecure) + ccr |= CC_SRCNS | CC_DSTNS; + if (rqc->insnaccess) + ccr |= CC_SRCIA | CC_DSTIA; + + ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT); + ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT); + + ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT); + ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT); + + ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT); + ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT); + + ccr |= (rqc->swap << CC_SWAP_SHFT); + + return ccr; +} + +static inline bool _is_valid(u32 ccr) +{ + enum pl330_dstcachectrl dcctl; + enum pl330_srccachectrl scctl; + + dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK; + scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK; + + if (dcctl == DINVALID1 || dcctl == DINVALID2 + || scctl == SINVALID1 || scctl == SINVALID2) + return false; + else + return true; +} + +/* + * Submit a list of xfers after which the client wants notification. + * Client is not notified after each xfer unit, just once after all + * xfer units are done or some error occurs. + */ +static int pl330_submit_req(void *ch_id, struct pl330_req *r) +{ + struct pl330_thread *thrd = ch_id; + struct pl330_dmac *pl330; + struct pl330_info *pi; + struct _xfer_spec xs; + unsigned long flags; + void __iomem *regs; + unsigned idx; + u32 ccr; + int ret = 0; + + /* No Req or Unacquired Channel or DMAC */ + if (!r || !thrd || thrd->free) + return -EINVAL; + + pl330 = thrd->dmac; + pi = pl330->pinfo; + regs = pi->base; + + if (pl330->state == DYING + || pl330->dmac_tbd.reset_chan & (1 << thrd->id)) { + dev_info(thrd->dmac->pinfo->dev, "%s:%d\n", + __func__, __LINE__); + return -EAGAIN; + } + + /* If request for non-existing peripheral */ + if (r->rqtype != MEMTOMEM && r->peri >= pi->pcfg.num_peri) { + dev_info(thrd->dmac->pinfo->dev, + "%s:%d Invalid peripheral(%u)!\n", + __func__, __LINE__, r->peri); + return -EINVAL; + } + + spin_lock_irqsave(&pl330->lock, flags); + + if (_queue_full(thrd)) { + ret = -EAGAIN; + goto xfer_exit; + } + + /* Prefer Secure Channel */ + if (!_manager_ns(thrd)) + r->cfg->nonsecure = 0; + else + r->cfg->nonsecure = 1; + + /* Use last settings, if not provided */ + if (r->cfg) + ccr = _prepare_ccr(r->cfg); + else + ccr = readl(regs + CC(thrd->id)); + + /* If this req doesn't have valid xfer settings */ + if (!_is_valid(ccr)) { + ret = -EINVAL; + dev_info(thrd->dmac->pinfo->dev, "%s:%d Invalid CCR(%x)!\n", + __func__, __LINE__, ccr); + goto xfer_exit; + } + + idx = IS_FREE(&thrd->req[0]) ? 0 : 1; + + xs.ccr = ccr; + xs.r = r; + + /* First dry run to check if req is acceptable */ + ret = _setup_req(1, thrd, idx, &xs); + if (ret < 0) + goto xfer_exit; + + if (ret > pi->mcbufsz / 2) { + dev_info(thrd->dmac->pinfo->dev, + "%s:%d Trying increasing mcbufsz\n", + __func__, __LINE__); + ret = -ENOMEM; + goto xfer_exit; + } + + /* Hook the request */ + thrd->lstenq = idx; + thrd->req[idx].mc_len = _setup_req(0, thrd, idx, &xs); + thrd->req[idx].r = r; + + ret = 0; + +xfer_exit: + spin_unlock_irqrestore(&pl330->lock, flags); + + return ret; +} + +static void pl330_dotask(unsigned long data) +{ + struct pl330_dmac *pl330 = (struct pl330_dmac *) data; + struct pl330_info *pi = pl330->pinfo; + unsigned long flags; + int i; + + spin_lock_irqsave(&pl330->lock, flags); + + /* The DMAC itself gone nuts */ + if (pl330->dmac_tbd.reset_dmac) { + pl330->state = DYING; + /* Reset the manager too */ + pl330->dmac_tbd.reset_mngr = true; + /* Clear the reset flag */ + pl330->dmac_tbd.reset_dmac = false; + } + + if (pl330->dmac_tbd.reset_mngr) { + _stop(pl330->manager); + /* Reset all channels */ + pl330->dmac_tbd.reset_chan = (1 << pi->pcfg.num_chan) - 1; + /* Clear the reset flag */ + pl330->dmac_tbd.reset_mngr = false; + } + + for (i = 0; i < pi->pcfg.num_chan; i++) { + + if (pl330->dmac_tbd.reset_chan & (1 << i)) { + struct pl330_thread *thrd = &pl330->channels[i]; + void __iomem *regs = pi->base; + enum pl330_op_err err; + + _stop(thrd); + + if (readl(regs + FSC) & (1 << thrd->id)) + err = PL330_ERR_FAIL; + else + err = PL330_ERR_ABORT; + + spin_unlock_irqrestore(&pl330->lock, flags); + + _callback(thrd->req[1 - thrd->lstenq].r, err); + _callback(thrd->req[thrd->lstenq].r, err); + + spin_lock_irqsave(&pl330->lock, flags); + + thrd->req[0].r = NULL; + thrd->req[1].r = NULL; + mark_free(thrd, 0); + mark_free(thrd, 1); + + /* Clear the reset flag */ + pl330->dmac_tbd.reset_chan &= ~(1 << i); + } + } + + spin_unlock_irqrestore(&pl330->lock, flags); + + return; +} + +/* Returns 1 if state was updated, 0 otherwise */ +static int pl330_update(const struct pl330_info *pi) +{ + struct _pl330_req *rqdone; + struct pl330_dmac *pl330; + unsigned long flags; + void __iomem *regs; + u32 val; + int id, ev, ret = 0; + + if (!pi || !pi->pl330_data) + return 0; + + regs = pi->base; + pl330 = pi->pl330_data; + + spin_lock_irqsave(&pl330->lock, flags); + + val = readl(regs + FSM) & 0x1; + if (val) + pl330->dmac_tbd.reset_mngr = true; + else + pl330->dmac_tbd.reset_mngr = false; + + val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1); + pl330->dmac_tbd.reset_chan |= val; + if (val) { + int i = 0; + while (i < pi->pcfg.num_chan) { + if (val & (1 << i)) { + dev_info(pi->dev, + "Reset Channel-%d\t CS-%x FTC-%x\n", + i, readl(regs + CS(i)), + readl(regs + FTC(i))); + _stop(&pl330->channels[i]); + } + i++; + } + } + + /* Check which event happened i.e, thread notified */ + val = readl(regs + ES); + if (pi->pcfg.num_events < 32 + && val & ~((1 << pi->pcfg.num_events) - 1)) { + pl330->dmac_tbd.reset_dmac = true; + dev_err(pi->dev, "%s:%d Unexpected!\n", __func__, __LINE__); + ret = 1; + goto updt_exit; + } + + for (ev = 0; ev < pi->pcfg.num_events; ev++) { + if (val & (1 << ev)) { /* Event occurred */ + struct pl330_thread *thrd; + u32 inten = readl(regs + INTEN); + int active; + + /* Clear the event */ + if (inten & (1 << ev)) + writel(1 << ev, regs + INTCLR); + + ret = 1; + + id = pl330->events[ev]; + + thrd = &pl330->channels[id]; + + active = thrd->req_running; + if (active == -1) /* Aborted */ + continue; + + rqdone = &thrd->req[active]; + mark_free(thrd, active); + + /* Get going again ASAP */ + _start(thrd); + + /* For now, just make a list of callbacks to be done */ + list_add_tail(&rqdone->rqd, &pl330->req_done); + } + } + + /* Now that we are in no hurry, do the callbacks */ + while (!list_empty(&pl330->req_done)) { + struct pl330_req *r; + + rqdone = container_of(pl330->req_done.next, + struct _pl330_req, rqd); + + list_del_init(&rqdone->rqd); + + /* Detach the req */ + r = rqdone->r; + rqdone->r = NULL; + + spin_unlock_irqrestore(&pl330->lock, flags); + _callback(r, PL330_ERR_NONE); + spin_lock_irqsave(&pl330->lock, flags); + } + +updt_exit: + spin_unlock_irqrestore(&pl330->lock, flags); + + if (pl330->dmac_tbd.reset_dmac + || pl330->dmac_tbd.reset_mngr + || pl330->dmac_tbd.reset_chan) { + ret = 1; + tasklet_schedule(&pl330->tasks); + } + + return ret; +} + +static int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op) +{ + struct pl330_thread *thrd = ch_id; + struct pl330_dmac *pl330; + unsigned long flags; + int ret = 0, active; + + if (!thrd || thrd->free || thrd->dmac->state == DYING) + return -EINVAL; + + pl330 = thrd->dmac; + active = thrd->req_running; + + spin_lock_irqsave(&pl330->lock, flags); + + switch (op) { + case PL330_OP_FLUSH: + /* Make sure the channel is stopped */ + _stop(thrd); + + thrd->req[0].r = NULL; + thrd->req[1].r = NULL; + mark_free(thrd, 0); + mark_free(thrd, 1); + break; + + case PL330_OP_ABORT: + /* Make sure the channel is stopped */ + _stop(thrd); + + /* ABORT is only for the active req */ + if (active == -1) + break; + + thrd->req[active].r = NULL; + mark_free(thrd, active); + + /* Start the next */ + case PL330_OP_START: + if ((active == -1) && !_start(thrd)) + ret = -EIO; + break; + + default: + ret = -EINVAL; + } + + spin_unlock_irqrestore(&pl330->lock, flags); + return ret; +} + +/* Reserve an event */ +static inline int _alloc_event(struct pl330_thread *thrd) +{ + struct pl330_dmac *pl330 = thrd->dmac; + struct pl330_info *pi = pl330->pinfo; + int ev; + + for (ev = 0; ev < pi->pcfg.num_events; ev++) + if (pl330->events[ev] == -1) { + pl330->events[ev] = thrd->id; + return ev; + } + + return -1; +} + +static bool _chan_ns(const struct pl330_info *pi, int i) +{ + return pi->pcfg.irq_ns & (1 << i); +} + +/* Upon success, returns IdentityToken for the + * allocated channel, NULL otherwise. + */ +static void *pl330_request_channel(const struct pl330_info *pi) +{ + struct pl330_thread *thrd = NULL; + struct pl330_dmac *pl330; + unsigned long flags; + int chans, i; + + if (!pi || !pi->pl330_data) + return NULL; + + pl330 = pi->pl330_data; + + if (pl330->state == DYING) + return NULL; + + chans = pi->pcfg.num_chan; + + spin_lock_irqsave(&pl330->lock, flags); + + for (i = 0; i < chans; i++) { + thrd = &pl330->channels[i]; + if ((thrd->free) && (!_manager_ns(thrd) || + _chan_ns(pi, i))) { + thrd->ev = _alloc_event(thrd); + if (thrd->ev >= 0) { + thrd->free = false; + thrd->lstenq = 1; + thrd->req[0].r = NULL; + mark_free(thrd, 0); + thrd->req[1].r = NULL; + mark_free(thrd, 1); + break; + } + } + thrd = NULL; + } + + spin_unlock_irqrestore(&pl330->lock, flags); + + return thrd; +} + +/* Release an event */ +static inline void _free_event(struct pl330_thread *thrd, int ev) +{ + struct pl330_dmac *pl330 = thrd->dmac; + struct pl330_info *pi = pl330->pinfo; + + /* If the event is valid and was held by the thread */ + if (ev >= 0 && ev < pi->pcfg.num_events + && pl330->events[ev] == thrd->id) + pl330->events[ev] = -1; +} + +static void pl330_release_channel(void *ch_id) +{ + struct pl330_thread *thrd = ch_id; + struct pl330_dmac *pl330; + unsigned long flags; + + if (!thrd || thrd->free) + return; + + _stop(thrd); + + _callback(thrd->req[1 - thrd->lstenq].r, PL330_ERR_ABORT); + _callback(thrd->req[thrd->lstenq].r, PL330_ERR_ABORT); + + pl330 = thrd->dmac; + + spin_lock_irqsave(&pl330->lock, flags); + _free_event(thrd, thrd->ev); + thrd->free = true; + spin_unlock_irqrestore(&pl330->lock, flags); +} + +/* Initialize the structure for PL330 configuration, that can be used + * by the client driver the make best use of the DMAC + */ +static void read_dmac_config(struct pl330_info *pi) +{ + void __iomem *regs = pi->base; + u32 val; + + val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT; + val &= CRD_DATA_WIDTH_MASK; + pi->pcfg.data_bus_width = 8 * (1 << val); + + val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT; + val &= CRD_DATA_BUFF_MASK; + pi->pcfg.data_buf_dep = val + 1; + + val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT; + val &= CR0_NUM_CHANS_MASK; + val += 1; + pi->pcfg.num_chan = val; + + val = readl(regs + CR0); + if (val & CR0_PERIPH_REQ_SET) { + val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK; + val += 1; + pi->pcfg.num_peri = val; + pi->pcfg.peri_ns = readl(regs + CR4); + } else { + pi->pcfg.num_peri = 0; + } + + val = readl(regs + CR0); + if (val & CR0_BOOT_MAN_NS) + pi->pcfg.mode |= DMAC_MODE_NS; + else + pi->pcfg.mode &= ~DMAC_MODE_NS; + + val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT; + val &= CR0_NUM_EVENTS_MASK; + val += 1; + pi->pcfg.num_events = val; + + pi->pcfg.irq_ns = readl(regs + CR3); + + pi->pcfg.periph_id = get_id(pi, PERIPH_ID); + pi->pcfg.pcell_id = get_id(pi, PCELL_ID); +} + +static inline void _reset_thread(struct pl330_thread *thrd) +{ + struct pl330_dmac *pl330 = thrd->dmac; + struct pl330_info *pi = pl330->pinfo; + + thrd->req[0].mc_cpu = pl330->mcode_cpu + + (thrd->id * pi->mcbufsz); + thrd->req[0].mc_bus = pl330->mcode_bus + + (thrd->id * pi->mcbufsz); + thrd->req[0].r = NULL; + mark_free(thrd, 0); + + thrd->req[1].mc_cpu = thrd->req[0].mc_cpu + + pi->mcbufsz / 2; + thrd->req[1].mc_bus = thrd->req[0].mc_bus + + pi->mcbufsz / 2; + thrd->req[1].r = NULL; + mark_free(thrd, 1); +} + +static int dmac_alloc_threads(struct pl330_dmac *pl330) +{ + struct pl330_info *pi = pl330->pinfo; + int chans = pi->pcfg.num_chan; + struct pl330_thread *thrd; + int i; + + /* Allocate 1 Manager and 'chans' Channel threads */ + pl330->channels = kzalloc((1 + chans) * sizeof(*thrd), + GFP_KERNEL); + if (!pl330->channels) + return -ENOMEM; + + /* Init Channel threads */ + for (i = 0; i < chans; i++) { + thrd = &pl330->channels[i]; + thrd->id = i; + thrd->dmac = pl330; + _reset_thread(thrd); + thrd->free = true; + } + + /* MANAGER is indexed at the end */ + thrd = &pl330->channels[chans]; + thrd->id = chans; + thrd->dmac = pl330; + thrd->free = false; + pl330->manager = thrd; + + return 0; +} + +static int dmac_alloc_resources(struct pl330_dmac *pl330) +{ + struct pl330_info *pi = pl330->pinfo; + int chans = pi->pcfg.num_chan; + int ret; + + /* + * Alloc MicroCode buffer for 'chans' Channel threads. + * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN) + */ + pl330->mcode_cpu = dma_alloc_coherent(pi->dev, + chans * pi->mcbufsz, + &pl330->mcode_bus, GFP_KERNEL); + if (!pl330->mcode_cpu) { + dev_err(pi->dev, "%s:%d Can't allocate memory!\n", + __func__, __LINE__); + return -ENOMEM; + } + + ret = dmac_alloc_threads(pl330); + if (ret) { + dev_err(pi->dev, "%s:%d Can't to create channels for DMAC!\n", + __func__, __LINE__); + dma_free_coherent(pi->dev, + chans * pi->mcbufsz, + pl330->mcode_cpu, pl330->mcode_bus); + return ret; + } + + return 0; +} + +static int pl330_add(struct pl330_info *pi) +{ + struct pl330_dmac *pl330; + void __iomem *regs; + int i, ret; + + if (!pi || !pi->dev) + return -EINVAL; + + /* If already added */ + if (pi->pl330_data) + return -EINVAL; + + /* + * If the SoC can perform reset on the DMAC, then do it + * before reading its configuration. + */ + if (pi->dmac_reset) + pi->dmac_reset(pi); + + regs = pi->base; + + /* Check if we can handle this DMAC */ + if ((get_id(pi, PERIPH_ID) & 0xfffff) != PERIPH_ID_VAL + || get_id(pi, PCELL_ID) != PCELL_ID_VAL) { + dev_err(pi->dev, "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n", + get_id(pi, PERIPH_ID), get_id(pi, PCELL_ID)); + return -EINVAL; + } + + /* Read the configuration of the DMAC */ + read_dmac_config(pi); + + if (pi->pcfg.num_events == 0) { + dev_err(pi->dev, "%s:%d Can't work without events!\n", + __func__, __LINE__); + return -EINVAL; + } + + pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL); + if (!pl330) { + dev_err(pi->dev, "%s:%d Can't allocate memory!\n", + __func__, __LINE__); + return -ENOMEM; + } + + /* Assign the info structure and private data */ + pl330->pinfo = pi; + pi->pl330_data = pl330; + + spin_lock_init(&pl330->lock); + + INIT_LIST_HEAD(&pl330->req_done); + + /* Use default MC buffer size if not provided */ + if (!pi->mcbufsz) + pi->mcbufsz = MCODE_BUFF_PER_REQ * 2; + + /* Mark all events as free */ + for (i = 0; i < pi->pcfg.num_events; i++) + pl330->events[i] = -1; + + /* Allocate resources needed by the DMAC */ + ret = dmac_alloc_resources(pl330); + if (ret) { + dev_err(pi->dev, "Unable to create channels for DMAC\n"); + kfree(pl330); + return ret; + } + + tasklet_init(&pl330->tasks, pl330_dotask, (unsigned long) pl330); + + pl330->state = INIT; + + return 0; +} + +static int dmac_free_threads(struct pl330_dmac *pl330) +{ + struct pl330_info *pi = pl330->pinfo; + int chans = pi->pcfg.num_chan; + struct pl330_thread *thrd; + int i; + + /* Release Channel threads */ + for (i = 0; i < chans; i++) { + thrd = &pl330->channels[i]; + pl330_release_channel((void *)thrd); + } + + /* Free memory */ + kfree(pl330->channels); + + return 0; +} + +static void dmac_free_resources(struct pl330_dmac *pl330) +{ + struct pl330_info *pi = pl330->pinfo; + int chans = pi->pcfg.num_chan; + + dmac_free_threads(pl330); + + dma_free_coherent(pi->dev, chans * pi->mcbufsz, + pl330->mcode_cpu, pl330->mcode_bus); +} + +static void pl330_del(struct pl330_info *pi) +{ + struct pl330_dmac *pl330; + + if (!pi || !pi->pl330_data) + return; + + pl330 = pi->pl330_data; + + pl330->state = UNINIT; + + tasklet_kill(&pl330->tasks); + + /* Free DMAC resources */ + dmac_free_resources(pl330); + + kfree(pl330); + pi->pl330_data = NULL; +} + +/* forward declaration */ +static struct amba_driver pl330_driver; + +static inline struct dma_pl330_chan * +to_pchan(struct dma_chan *ch) +{ + if (!ch) + return NULL; + + return container_of(ch, struct dma_pl330_chan, chan); +} + +static inline struct dma_pl330_desc * +to_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct dma_pl330_desc, txd); +} + +static inline void free_desc_list(struct list_head *list) +{ + struct dma_pl330_dmac *pdmac; + struct dma_pl330_desc *desc; + struct dma_pl330_chan *pch = NULL; + unsigned long flags; + + /* Finish off the work list */ + list_for_each_entry(desc, list, node) { + dma_async_tx_callback callback; + void *param; + + /* All desc in a list belong to same channel */ + pch = desc->pchan; + callback = desc->txd.callback; + param = desc->txd.callback_param; + + if (callback) + callback(param); + + desc->pchan = NULL; + } + + /* pch will be unset if list was empty */ + if (!pch) + return; + + pdmac = pch->dmac; + + spin_lock_irqsave(&pdmac->pool_lock, flags); + list_splice_tail_init(list, &pdmac->desc_pool); + spin_unlock_irqrestore(&pdmac->pool_lock, flags); +} + +static inline void handle_cyclic_desc_list(struct list_head *list) +{ + struct dma_pl330_desc *desc; + struct dma_pl330_chan *pch = NULL; + unsigned long flags; + + list_for_each_entry(desc, list, node) { + dma_async_tx_callback callback; + + /* Change status to reload it */ + desc->status = PREP; + pch = desc->pchan; + callback = desc->txd.callback; + if (callback) + callback(desc->txd.callback_param); + } + + /* pch will be unset if list was empty */ + if (!pch) + return; + + spin_lock_irqsave(&pch->lock, flags); + list_splice_tail_init(list, &pch->work_list); + spin_unlock_irqrestore(&pch->lock, flags); +} + +static inline void fill_queue(struct dma_pl330_chan *pch) +{ + struct dma_pl330_desc *desc; + int ret; + + list_for_each_entry(desc, &pch->work_list, node) { + + /* If already submitted */ + if (desc->status == BUSY) + break; + + ret = pl330_submit_req(pch->pl330_chid, + &desc->req); + if (!ret) { + desc->status = BUSY; + break; + } else if (ret == -EAGAIN) { + /* QFull or DMAC Dying */ + break; + } else { + /* Unacceptable request */ + desc->status = DONE; + dev_err(pch->dmac->pif.dev, "%s:%d Bad Desc(%d)\n", + __func__, __LINE__, desc->txd.cookie); + tasklet_schedule(&pch->task); + } + } +} + +static void pl330_tasklet(unsigned long data) +{ + struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data; + struct dma_pl330_desc *desc, *_dt; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&pch->lock, flags); + + /* Pick up ripe tomatoes */ + list_for_each_entry_safe(desc, _dt, &pch->work_list, node) + if (desc->status == DONE) { + if (!pch->cyclic) + dma_cookie_complete(&desc->txd); + list_move_tail(&desc->node, &list); + } + + /* Try to submit a req imm. next to the last completed cookie */ + fill_queue(pch); + + /* Make sure the PL330 Channel thread is active */ + pl330_chan_ctrl(pch->pl330_chid, PL330_OP_START); + + spin_unlock_irqrestore(&pch->lock, flags); + + if (pch->cyclic) + handle_cyclic_desc_list(&list); + else + free_desc_list(&list); +} + +static void dma_pl330_rqcb(void *token, enum pl330_op_err err) +{ + struct dma_pl330_desc *desc = token; + struct dma_pl330_chan *pch = desc->pchan; + unsigned long flags; + + /* If desc aborted */ + if (!pch) + return; + + spin_lock_irqsave(&pch->lock, flags); + + desc->status = DONE; + + spin_unlock_irqrestore(&pch->lock, flags); + + tasklet_schedule(&pch->task); +} + +bool pl330_filter(struct dma_chan *chan, void *param) +{ + u8 *peri_id; + + if (chan->device->dev->driver != &pl330_driver.drv) + return false; + +#ifdef CONFIG_OF + if (chan->device->dev->of_node) { + const __be32 *prop_value; + phandle phandle; + struct device_node *node; + + prop_value = ((struct property *)param)->value; + phandle = be32_to_cpup(prop_value++); + node = of_find_node_by_phandle(phandle); + return ((chan->private == node) && + (chan->chan_id == be32_to_cpup(prop_value))); + } +#endif + + peri_id = chan->private; + return *peri_id == (unsigned)param; +} +EXPORT_SYMBOL(pl330_filter); + +static int pl330_alloc_chan_resources(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + struct dma_pl330_dmac *pdmac = pch->dmac; + unsigned long flags; + + spin_lock_irqsave(&pch->lock, flags); + + dma_cookie_init(chan); + pch->cyclic = false; + + pch->pl330_chid = pl330_request_channel(&pdmac->pif); + if (!pch->pl330_chid) { + spin_unlock_irqrestore(&pch->lock, flags); + return 0; + } + + tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch); + + spin_unlock_irqrestore(&pch->lock, flags); + + return 1; +} + +static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned long arg) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + struct dma_pl330_desc *desc, *_dt; + unsigned long flags; + struct dma_pl330_dmac *pdmac = pch->dmac; + struct dma_slave_config *slave_config; + LIST_HEAD(list); + + switch (cmd) { + case DMA_TERMINATE_ALL: + spin_lock_irqsave(&pch->lock, flags); + + /* FLUSH the PL330 Channel thread */ + pl330_chan_ctrl(pch->pl330_chid, PL330_OP_FLUSH); + + /* Mark all desc done */ + list_for_each_entry_safe(desc, _dt, &pch->work_list , node) { + desc->status = DONE; + list_move_tail(&desc->node, &list); + } + + list_splice_tail_init(&list, &pdmac->desc_pool); + spin_unlock_irqrestore(&pch->lock, flags); + break; + case DMA_SLAVE_CONFIG: + slave_config = (struct dma_slave_config *)arg; + + if (slave_config->direction == DMA_MEM_TO_DEV) { + if (slave_config->dst_addr) + pch->fifo_addr = slave_config->dst_addr; + if (slave_config->dst_addr_width) + pch->burst_sz = __ffs(slave_config->dst_addr_width); + if (slave_config->dst_maxburst) + pch->burst_len = slave_config->dst_maxburst; + } else if (slave_config->direction == DMA_DEV_TO_MEM) { + if (slave_config->src_addr) + pch->fifo_addr = slave_config->src_addr; + if (slave_config->src_addr_width) + pch->burst_sz = __ffs(slave_config->src_addr_width); + if (slave_config->src_maxburst) + pch->burst_len = slave_config->src_maxburst; + } + break; + default: + dev_err(pch->dmac->pif.dev, "Not supported command.\n"); + return -ENXIO; + } + + return 0; +} + +static void pl330_free_chan_resources(struct dma_chan *chan) +{ + struct dma_pl330_chan *pch = to_pchan(chan); + unsigned long flags; + + spin_lock_irqsave(&pch->lock, flags); + + tasklet_kill(&pch->task); + + pl330_release_channel(pch->pl330_chid); + pch->pl330_chid = NULL; + + if (pch->cyclic) + list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); + + spin_unlock_irqrestore(&pch->lock, flags); +} + +static enum dma_status +pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(chan, cookie, txstate); +} + +static void pl330_issue_pending(struct dma_chan *chan) +{ + pl330_tasklet((unsigned long) to_pchan(chan)); +} + +/* + * We returned the last one of the circular list of descriptor(s) + * from prep_xxx, so the argument to submit corresponds to the last + * descriptor of the list. + */ +static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dma_pl330_desc *desc, *last = to_desc(tx); + struct dma_pl330_chan *pch = to_pchan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&pch->lock, flags); + + /* Assign cookies to all nodes */ + while (!list_empty(&last->node)) { + desc = list_entry(last->node.next, struct dma_pl330_desc, node); + + dma_cookie_assign(&desc->txd); + + list_move_tail(&desc->node, &pch->work_list); + } + + cookie = dma_cookie_assign(&last->txd); + list_add_tail(&last->node, &pch->work_list); + spin_unlock_irqrestore(&pch->lock, flags); + + return cookie; +} + +static inline void _init_desc(struct dma_pl330_desc *desc) +{ + desc->pchan = NULL; + desc->req.x = &desc->px; + desc->req.token = desc; + desc->rqcfg.swap = SWAP_NO; + desc->rqcfg.privileged = 0; + desc->rqcfg.insnaccess = 0; + desc->rqcfg.scctl = SCCTRL0; + desc->rqcfg.dcctl = DCCTRL0; + desc->req.cfg = &desc->rqcfg; + desc->req.xfer_cb = dma_pl330_rqcb; + desc->txd.tx_submit = pl330_tx_submit; + + INIT_LIST_HEAD(&desc->node); +} + +/* Returns the number of descriptors added to the DMAC pool */ +int add_desc(struct dma_pl330_dmac *pdmac, gfp_t flg, int count) +{ + struct dma_pl330_desc *desc; + unsigned long flags; + int i; + + if (!pdmac) + return 0; + + desc = kmalloc(count * sizeof(*desc), flg); + if (!desc) + return 0; + + spin_lock_irqsave(&pdmac->pool_lock, flags); + + for (i = 0; i < count; i++) { + _init_desc(&desc[i]); + list_add_tail(&desc[i].node, &pdmac->desc_pool); + } + + spin_unlock_irqrestore(&pdmac->pool_lock, flags); + + return count; +} + +static struct dma_pl330_desc * +pluck_desc(struct dma_pl330_dmac *pdmac) +{ + struct dma_pl330_desc *desc = NULL; + unsigned long flags; + + if (!pdmac) + return NULL; + + spin_lock_irqsave(&pdmac->pool_lock, flags); + + if (!list_empty(&pdmac->desc_pool)) { + desc = list_entry(pdmac->desc_pool.next, + struct dma_pl330_desc, node); + + list_del_init(&desc->node); + + desc->status = PREP; + desc->txd.callback = NULL; + } + + spin_unlock_irqrestore(&pdmac->pool_lock, flags); + + return desc; +} + +static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) +{ + struct dma_pl330_dmac *pdmac = pch->dmac; + u8 *peri_id = pch->chan.private; + struct dma_pl330_desc *desc; + + /* Pluck one desc from the pool of DMAC */ + desc = pluck_desc(pdmac); + + /* If the DMAC pool is empty, alloc new */ + if (!desc) { + if (!add_desc(pdmac, GFP_ATOMIC, 1)) + return NULL; + + /* Try again */ + desc = pluck_desc(pdmac); + if (!desc) { + dev_err(pch->dmac->pif.dev, + "%s:%d ALERT!\n", __func__, __LINE__); + return NULL; + } + } + + /* Initialize the descriptor */ + desc->pchan = pch; + desc->txd.cookie = 0; + async_tx_ack(&desc->txd); + + desc->req.peri = peri_id ? pch->chan.chan_id : 0; + desc->rqcfg.pcfg = &pch->dmac->pif.pcfg; + + dma_async_tx_descriptor_init(&desc->txd, &pch->chan); + + return desc; +} + +static inline void fill_px(struct pl330_xfer *px, + dma_addr_t dst, dma_addr_t src, size_t len) +{ + px->next = NULL; + px->bytes = len; + px->dst_addr = dst; + px->src_addr = src; +} + +static struct dma_pl330_desc * +__pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst, + dma_addr_t src, size_t len) +{ + struct dma_pl330_desc *desc = pl330_get_desc(pch); + + if (!desc) { + dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + return NULL; + } + + /* + * Ideally we should lookout for reqs bigger than + * those that can be programmed with 256 bytes of + * MC buffer, but considering a req size is seldom + * going to be word-unaligned and more than 200MB, + * we take it easy. + * Also, should the limit is reached we'd rather + * have the platform increase MC buffer size than + * complicating this API driver. + */ + fill_px(&desc->px, dst, src, len); + + return desc; +} + +/* Call after fixing burst size */ +static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) +{ + struct dma_pl330_chan *pch = desc->pchan; + struct pl330_info *pi = &pch->dmac->pif; + int burst_len; + + burst_len = pi->pcfg.data_bus_width / 8; + burst_len *= pi->pcfg.data_buf_dep; + burst_len >>= desc->rqcfg.brst_size; + + /* src/dst_burst_len can't be more than 16 */ + if (burst_len > 16) + burst_len = 16; + + while (burst_len > 1) { + if (!(len % (burst_len << desc->rqcfg.brst_size))) + break; + burst_len--; + } + + return burst_len; +} + +static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t len, + size_t period_len, enum dma_transfer_direction direction, + void *context) +{ + struct dma_pl330_desc *desc; + struct dma_pl330_chan *pch = to_pchan(chan); + dma_addr_t dst; + dma_addr_t src; + + desc = pl330_get_desc(pch); + if (!desc) { + dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + return NULL; + } + + switch (direction) { + case DMA_MEM_TO_DEV: + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 0; + desc->req.rqtype = MEMTODEV; + src = dma_addr; + dst = pch->fifo_addr; + break; + case DMA_DEV_TO_MEM: + desc->rqcfg.src_inc = 0; + desc->rqcfg.dst_inc = 1; + desc->req.rqtype = DEVTOMEM; + src = pch->fifo_addr; + dst = dma_addr; + break; + default: + dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n", + __func__, __LINE__); + return NULL; + } + + desc->rqcfg.brst_size = pch->burst_sz; + desc->rqcfg.brst_len = 1; + + pch->cyclic = true; + + fill_px(&desc->px, dst, src, period_len); + + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct dma_pl330_desc *desc; + struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_info *pi; + int burst; + + if (unlikely(!pch || !len)) + return NULL; + + pi = &pch->dmac->pif; + + desc = __pl330_prep_dma_memcpy(pch, dst, src, len); + if (!desc) + return NULL; + + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 1; + desc->req.rqtype = MEMTOMEM; + + /* Select max possible burst size */ + burst = pi->pcfg.data_bus_width / 8; + + while (burst > 1) { + if (!(len % burst)) + break; + burst /= 2; + } + + desc->rqcfg.brst_size = 0; + while (burst != (1 << desc->rqcfg.brst_size)) + desc->rqcfg.brst_size++; + + desc->rqcfg.brst_len = get_burst_len(desc, len); + + desc->txd.flags = flags; + + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flg, void *context) +{ + struct dma_pl330_desc *first, *desc = NULL; + struct dma_pl330_chan *pch = to_pchan(chan); + struct scatterlist *sg; + unsigned long flags; + int i; + dma_addr_t addr; + + if (unlikely(!pch || !sgl || !sg_len)) + return NULL; + + addr = pch->fifo_addr; + + first = NULL; + + for_each_sg(sgl, sg, sg_len, i) { + + desc = pl330_get_desc(pch); + if (!desc) { + struct dma_pl330_dmac *pdmac = pch->dmac; + + dev_err(pch->dmac->pif.dev, + "%s:%d Unable to fetch desc\n", + __func__, __LINE__); + if (!first) + return NULL; + + spin_lock_irqsave(&pdmac->pool_lock, flags); + + while (!list_empty(&first->node)) { + desc = list_entry(first->node.next, + struct dma_pl330_desc, node); + list_move_tail(&desc->node, &pdmac->desc_pool); + } + + list_move_tail(&first->node, &pdmac->desc_pool); + + spin_unlock_irqrestore(&pdmac->pool_lock, flags); + + return NULL; + } + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->node); + + if (direction == DMA_MEM_TO_DEV) { + desc->rqcfg.src_inc = 1; + desc->rqcfg.dst_inc = 0; + desc->req.rqtype = MEMTODEV; + fill_px(&desc->px, + addr, sg_dma_address(sg), sg_dma_len(sg)); + } else { + desc->rqcfg.src_inc = 0; + desc->rqcfg.dst_inc = 1; + desc->req.rqtype = DEVTOMEM; + fill_px(&desc->px, + sg_dma_address(sg), addr, sg_dma_len(sg)); + } + + desc->rqcfg.brst_size = pch->burst_sz; + desc->rqcfg.brst_len = 1; + } + + /* Return the last desc in the chain */ + desc->txd.flags = flg; + return &desc->txd; +} + +static irqreturn_t pl330_irq_handler(int irq, void *data) +{ + if (pl330_update(data)) + return IRQ_HANDLED; + else + return IRQ_NONE; +} + +static int __devinit +pl330_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct dma_pl330_platdata *pdat; + struct dma_pl330_dmac *pdmac; + struct dma_pl330_chan *pch; + struct pl330_info *pi; + struct dma_device *pd; + struct resource *res; + int i, ret, irq; + int num_chan; + + pdat = adev->dev.platform_data; + + /* Allocate a new DMAC and its Channels */ + pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL); + if (!pdmac) { + dev_err(&adev->dev, "unable to allocate mem\n"); + return -ENOMEM; + } + + pi = &pdmac->pif; + pi->dev = &adev->dev; + pi->pl330_data = NULL; + pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0; + + res = &adev->res; + request_mem_region(res->start, resource_size(res), "dma-pl330"); + + pi->base = ioremap(res->start, resource_size(res)); + if (!pi->base) { + ret = -ENXIO; + goto probe_err1; + } + + pdmac->clk = clk_get(&adev->dev, "dma"); + if (IS_ERR(pdmac->clk)) { + dev_err(&adev->dev, "Cannot get operation clock.\n"); + ret = -EINVAL; + goto probe_err2; + } + + amba_set_drvdata(adev, pdmac); + +#ifndef CONFIG_PM_RUNTIME + /* enable dma clk */ + clk_enable(pdmac->clk); +#endif + + irq = adev->irq[0]; + ret = request_irq(irq, pl330_irq_handler, 0, + dev_name(&adev->dev), pi); + if (ret) + goto probe_err3; + + ret = pl330_add(pi); + if (ret) + goto probe_err4; + + INIT_LIST_HEAD(&pdmac->desc_pool); + spin_lock_init(&pdmac->pool_lock); + + /* Create a descriptor pool of default size */ + if (!add_desc(pdmac, GFP_KERNEL, NR_DEFAULT_DESC)) + dev_warn(&adev->dev, "unable to allocate desc\n"); + + pd = &pdmac->ddma; + INIT_LIST_HEAD(&pd->channels); + + /* Initialize channel parameters */ + if (pdat) + num_chan = max_t(int, pdat->nr_valid_peri, pi->pcfg.num_chan); + else + num_chan = max_t(int, pi->pcfg.num_peri, pi->pcfg.num_chan); + + pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); + + for (i = 0; i < num_chan; i++) { + pch = &pdmac->peripherals[i]; + if (!adev->dev.of_node) + pch->chan.private = pdat ? &pdat->peri_id[i] : NULL; + else + pch->chan.private = adev->dev.of_node; + + INIT_LIST_HEAD(&pch->work_list); + spin_lock_init(&pch->lock); + pch->pl330_chid = NULL; + pch->chan.device = pd; + pch->dmac = pdmac; + + /* Add the channel to the DMAC list */ + list_add_tail(&pch->chan.device_node, &pd->channels); + } + + pd->dev = &adev->dev; + if (pdat) { + pd->cap_mask = pdat->cap_mask; + } else { + dma_cap_set(DMA_MEMCPY, pd->cap_mask); + if (pi->pcfg.num_peri) { + dma_cap_set(DMA_SLAVE, pd->cap_mask); + dma_cap_set(DMA_CYCLIC, pd->cap_mask); + } + } + + pd->device_alloc_chan_resources = pl330_alloc_chan_resources; + pd->device_free_chan_resources = pl330_free_chan_resources; + pd->device_prep_dma_memcpy = pl330_prep_dma_memcpy; + pd->device_prep_dma_cyclic = pl330_prep_dma_cyclic; + pd->device_tx_status = pl330_tx_status; + pd->device_prep_slave_sg = pl330_prep_slave_sg; + pd->device_control = pl330_control; + pd->device_issue_pending = pl330_issue_pending; + + ret = dma_async_device_register(pd); + if (ret) { + dev_err(&adev->dev, "unable to register DMAC\n"); + goto probe_err5; + } + + dev_info(&adev->dev, + "Loaded driver for PL330 DMAC-%d\n", adev->periphid); + dev_info(&adev->dev, + "\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n", + pi->pcfg.data_buf_dep, + pi->pcfg.data_bus_width / 8, pi->pcfg.num_chan, + pi->pcfg.num_peri, pi->pcfg.num_events); + + return 0; + +probe_err5: + pl330_del(pi); +probe_err4: + free_irq(irq, pi); +probe_err3: +#ifndef CONFIG_PM_RUNTIME + clk_disable(pdmac->clk); +#endif + clk_put(pdmac->clk); +probe_err2: + iounmap(pi->base); +probe_err1: + release_mem_region(res->start, resource_size(res)); + kfree(pdmac); + + return ret; +} + +static int __devexit pl330_remove(struct amba_device *adev) +{ + struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev); + struct dma_pl330_chan *pch, *_p; + struct pl330_info *pi; + struct resource *res; + int irq; + + if (!pdmac) + return 0; + + amba_set_drvdata(adev, NULL); + + /* Idle the DMAC */ + list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels, + chan.device_node) { + + /* Remove the channel */ + list_del(&pch->chan.device_node); + + /* Flush the channel */ + pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); + pl330_free_chan_resources(&pch->chan); + } + + pi = &pdmac->pif; + + pl330_del(pi); + + irq = adev->irq[0]; + free_irq(irq, pi); + + iounmap(pi->base); + + res = &adev->res; + release_mem_region(res->start, resource_size(res)); + +#ifndef CONFIG_PM_RUNTIME + clk_disable(pdmac->clk); +#endif + + kfree(pdmac); + + return 0; +} + +static struct amba_id pl330_ids[] = { + { + .id = 0x00041330, + .mask = 0x000fffff, + }, + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl330_ids); + +#ifdef CONFIG_PM_RUNTIME +static int pl330_runtime_suspend(struct device *dev) +{ + struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev); + + if (!pdmac) { + dev_err(dev, "failed to get dmac\n"); + return -ENODEV; + } + + clk_disable(pdmac->clk); + + return 0; +} + +static int pl330_runtime_resume(struct device *dev) +{ + struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev); + + if (!pdmac) { + dev_err(dev, "failed to get dmac\n"); + return -ENODEV; + } + + clk_enable(pdmac->clk); + + return 0; +} +#else +#define pl330_runtime_suspend NULL +#define pl330_runtime_resume NULL +#endif /* CONFIG_PM_RUNTIME */ + +static const struct dev_pm_ops pl330_pm_ops = { + .runtime_suspend = pl330_runtime_suspend, + .runtime_resume = pl330_runtime_resume, +}; + +static struct amba_driver pl330_driver = { + .drv = { + .owner = THIS_MODULE, + .name = "dma-pl330", + .pm = &pl330_pm_ops, + }, + .id_table = pl330_ids, + .probe = pl330_probe, + .remove = pl330_remove, +}; + +module_amba_driver(pl330_driver); + +MODULE_AUTHOR("Jaswinder Singh <jassi.brar@samsung.com>"); +MODULE_DESCRIPTION("API Driver for PL330 DMAC"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile new file mode 100644 index 00000000..b3d259b3 --- /dev/null +++ b/drivers/dma/ppc4xx/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c new file mode 100644 index 00000000..ced98826 --- /dev/null +++ b/drivers/dma/ppc4xx/adma.c @@ -0,0 +1,4992 @@ +/* + * Copyright (C) 2006-2009 DENX Software Engineering. + * + * Author: Yuri Tikhonov <yur@emcraft.com> + * + * Further porting to arch/powerpc by + * Anatolij Gustschin <agust@denx.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the AMCC PPC440SPe Processors. + * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + * ADMA driver written by D.Williams. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/async_tx.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/proc_fs.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> +#include "adma.h" +#include "../dmaengine.h" + +enum ppc_adma_init_code { + PPC_ADMA_INIT_OK = 0, + PPC_ADMA_INIT_MEMRES, + PPC_ADMA_INIT_MEMREG, + PPC_ADMA_INIT_ALLOC, + PPC_ADMA_INIT_COHERENT, + PPC_ADMA_INIT_CHANNEL, + PPC_ADMA_INIT_IRQ1, + PPC_ADMA_INIT_IRQ2, + PPC_ADMA_INIT_REGISTER +}; + +static char *ppc_adma_errors[] = { + [PPC_ADMA_INIT_OK] = "ok", + [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource", + [PPC_ADMA_INIT_MEMREG] = "failed to request memory region", + [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev " + "structure", + [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for " + "hardware descriptors", + [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel", + [PPC_ADMA_INIT_IRQ1] = "failed to request first irq", + [PPC_ADMA_INIT_IRQ2] = "failed to request second irq", + [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device", +}; + +static enum ppc_adma_init_code +ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM]; + +struct ppc_dma_chan_ref { + struct dma_chan *chan; + struct list_head node; +}; + +/* The list of channels exported by ppc440spe ADMA */ +struct list_head +ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list); + +/* This flag is set when want to refetch the xor chain in the interrupt + * handler + */ +static u32 do_xor_refetch; + +/* Pointer to DMA0, DMA1 CP/CS FIFO */ +static void *ppc440spe_dma_fifo_buf; + +/* Pointers to last submitted to DMA0, DMA1 CDBs */ +static struct ppc440spe_adma_desc_slot *chan_last_sub[3]; +static struct ppc440spe_adma_desc_slot *chan_first_cdb[3]; + +/* Pointer to last linked and submitted xor CB */ +static struct ppc440spe_adma_desc_slot *xor_last_linked; +static struct ppc440spe_adma_desc_slot *xor_last_submit; + +/* This array is used in data-check operations for storing a pattern */ +static char ppc440spe_qword[16]; + +static atomic_t ppc440spe_adma_err_irq_ref; +static dcr_host_t ppc440spe_mq_dcr_host; +static unsigned int ppc440spe_mq_dcr_len; + +/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up + * the block size in transactions, then we do not allow to activate more than + * only one RXOR transactions simultaneously. So use this var to store + * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is + * set) or not (PPC440SPE_RXOR_RUN is clear). + */ +static unsigned long ppc440spe_rxor_state; + +/* These are used in enable & check routines + */ +static u32 ppc440spe_r6_enabled; +static struct ppc440spe_adma_chan *ppc440spe_r6_tchan; +static struct completion ppc440spe_r6_test_comp; + +static int ppc440spe_adma_dma2rxor_prep_src( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_rxor *cursor, int index, + int src_cnt, u32 addr); +static void ppc440spe_adma_dma2rxor_set_src( + struct ppc440spe_adma_desc_slot *desc, + int index, dma_addr_t addr); +static void ppc440spe_adma_dma2rxor_set_mult( + struct ppc440spe_adma_desc_slot *desc, + int index, u8 mult); + +#ifdef ADMA_LL_DEBUG +#define ADMA_LL_DBG(x) ({ if (1) x; 0; }) +#else +#define ADMA_LL_DBG(x) ({ if (0) x; 0; }) +#endif + +static void print_cb(struct ppc440spe_adma_chan *chan, void *block) +{ + struct dma_cdb *cdb; + struct xor_cb *cb; + int i; + + switch (chan->device->id) { + case 0: + case 1: + cdb = block; + + pr_debug("CDB at %p [%d]:\n" + "\t attr 0x%02x opc 0x%02x cnt 0x%08x\n" + "\t sg1u 0x%08x sg1l 0x%08x\n" + "\t sg2u 0x%08x sg2l 0x%08x\n" + "\t sg3u 0x%08x sg3l 0x%08x\n", + cdb, chan->device->id, + cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt), + le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l), + le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l), + le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l) + ); + break; + case 2: + cb = block; + + pr_debug("CB at %p [%d]:\n" + "\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n" + "\t cbtah 0x%08x cbtal 0x%08x\n" + "\t cblah 0x%08x cblal 0x%08x\n", + cb, chan->device->id, + cb->cbc, cb->cbbc, cb->cbs, + cb->cbtah, cb->cbtal, + cb->cblah, cb->cblal); + for (i = 0; i < 16; i++) { + if (i && !cb->ops[i].h && !cb->ops[i].l) + continue; + pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n", + i, cb->ops[i].h, cb->ops[i].l); + } + break; + } +} + +static void print_cb_list(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *iter) +{ + for (; iter; iter = iter->hw_next) + print_cb(chan, iter->hw_desc); +} + +static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt) +{ + int i; + + pr_debug("\n%s(%d):\nsrc: ", __func__, id); + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx ", src[i]); + pr_debug("dst:\n\t0x%016llx\n", dst); +} + +static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt) +{ + int i; + + pr_debug("\n%s(%d):\nsrc: ", __func__, id); + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx ", src[i]); + pr_debug("dst: "); + for (i = 0; i < 2; i++) + pr_debug("\t0x%016llx ", dst[i]); +} + +static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src, + unsigned int src_cnt, + const unsigned char *scf) +{ + int i; + + pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id); + if (scf) { + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]); + } else { + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx(no) ", src[i]); + } + + pr_debug("dst: "); + for (i = 0; i < 2; i++) + pr_debug("\t0x%016llx ", src[src_cnt + i]); +} + +/****************************************************************************** + * Command (Descriptor) Blocks low-level routines + ******************************************************************************/ +/** + * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT + * pseudo operation + */ +static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct xor_cb *p; + + switch (chan->device->id) { + case PPC440SPE_XOR_ID: + p = desc->hw_desc; + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + /* NOP with Command Block Complete Enable */ + p->cbc = XOR_CBCR_CBCE_BIT; + break; + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + /* NOP with interrupt */ + set_bit(PPC440SPE_DESC_INT, &desc->flags); + break; + default: + printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id, + __func__); + break; + } +} + +/** + * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR + * pseudo operation + */ +static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc) +{ + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = 0; + desc->dst_cnt = 1; +} + +/** + * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation + */ +static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc, + int src_cnt, unsigned long flags) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = src_cnt; + desc->dst_cnt = 1; + + hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt; + if (flags & DMA_PREP_INTERRUPT) + /* Enable interrupt on completion */ + hw_desc->cbc |= XOR_CBCR_CBCE_BIT; +} + +/** + * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ + * operation in DMA2 controller + */ +static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt, unsigned long flags) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; + memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags)); + desc->descs_per_op = 0; + + hw_desc->cbc = XOR_CBCR_TGT_BIT; + if (flags & DMA_PREP_INTERRUPT) + /* Enable interrupt on completion */ + hw_desc->cbc |= XOR_CBCR_CBCE_BIT; +} + +#define DMA_CTRL_FLAGS_LAST DMA_PREP_FENCE +#define DMA_PREP_ZERO_P (DMA_CTRL_FLAGS_LAST << 1) +#define DMA_PREP_ZERO_Q (DMA_PREP_ZERO_P << 1) + +/** + * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation + * with DMA0/1 + */ +static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt, unsigned long flags, + unsigned long op) +{ + struct dma_cdb *hw_desc; + struct ppc440spe_adma_desc_slot *iter; + u8 dopc; + + /* Common initialization of a PQ descriptors chain */ + set_bits(op, &desc->flags); + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; + + /* WXOR MULTICAST if both P and Q are being computed + * MV_SG1_SG2 if Q only + */ + dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ? + DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2; + + list_for_each_entry(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + + if (likely(!list_is_last(&iter->chain_node, + &desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + } else { + /* this is the last descriptor. + * this slot will be pasted from ADMA level + * each time it wants to configure parameters + * of the transaction (src, dst, ...) + */ + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + } + } + + /* Set OPS depending on WXOR/RXOR type of operation */ + if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) { + /* This is a WXOR only chain: + * - first descriptors are for zeroing destinations + * if PPC440SPE_ZERO_P/Q set; + * - descriptors remained are for GF-XOR operations. + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + + if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) { + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) { + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + list_for_each_entry_from(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + hw_desc->opc = dopc; + } + } else { + /* This is either RXOR-only or mixed RXOR/WXOR */ + + /* The first 1 or 2 slots in chain are always RXOR, + * if need to calculate P & Q, then there are two + * RXOR slots; if only P or only Q, then there is one + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + + if (desc->dst_cnt == DMA_DEST_MAX_NUM) { + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + } + + /* The remaining descs (if any) are WXORs */ + if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) { + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + list_for_each_entry_from(iter, &desc->group_list, + chain_node) { + hw_desc = iter->hw_desc; + hw_desc->opc = dopc; + } + } + } +} + +/** + * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor + * for PQ_ZERO_SUM operation + */ +static void ppc440spe_desc_init_dma01pqzero_sum( + struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt) +{ + struct dma_cdb *hw_desc; + struct ppc440spe_adma_desc_slot *iter; + int i = 0; + u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST : + DMA_CDB_OPC_MV_SG1_SG2; + /* + * Initialize starting from 2nd or 3rd descriptor dependent + * on dst_cnt. First one or two slots are for cloning P + * and/or Q to chan->pdest and/or chan->qdest as we have + * to preserve original P/Q. + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, chain_node); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + + if (dst_cnt > 1) { + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + } + /* initialize each source descriptor in chain */ + list_for_each_entry_from(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->src_cnt = 0; + iter->dst_cnt = 0; + + /* This is a ZERO_SUM operation: + * - <src_cnt> descriptors starting from 2nd or 3rd + * descriptor are for GF-XOR operations; + * - remaining <dst_cnt> descriptors are for checking the result + */ + if (i++ < src_cnt) + /* MV_SG1_SG2 if only Q is being verified + * MULTICAST if both P and Q are being verified + */ + hw_desc->opc = dopc; + else + /* DMA_CDB_OPC_DCHECK128 operation */ + hw_desc->opc = DMA_CDB_OPC_DCHECK128; + + if (likely(!list_is_last(&iter->chain_node, + &desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } else { + /* this is the last descriptor. + * this slot will be pasted from ADMA level + * each time it wants to configure parameters + * of the transaction (src, dst, ...) + */ + iter->hw_next = NULL; + /* always enable interrupt generation since we get + * the status of pqzero from the handler + */ + set_bit(PPC440SPE_DESC_INT, &iter->flags); + } + } + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; +} + +/** + * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation + */ +static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc, + unsigned long flags) +{ + struct dma_cdb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + desc->hw_next = NULL; + desc->src_cnt = 1; + desc->dst_cnt = 1; + + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &desc->flags); + else + clear_bit(PPC440SPE_DESC_INT, &desc->flags); + + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; +} + +/** + * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation + */ +static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc, + int value, unsigned long flags) +{ + struct dma_cdb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + desc->hw_next = NULL; + desc->src_cnt = 1; + desc->dst_cnt = 1; + + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &desc->flags); + else + clear_bit(PPC440SPE_DESC_INT, &desc->flags); + + hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value); + hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value); + hw_desc->opc = DMA_CDB_OPC_DFILL128; +} + +/** + * ppc440spe_desc_set_src_addr - set source address into the descriptor + */ +static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + int src_idx, dma_addr_t addrh, + dma_addr_t addrl) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + phys_addr_t addr64, tmplow, tmphi; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (!addrh) { + addr64 = addrl; + tmphi = (addr64 >> 32); + tmplow = (addr64 & 0xFFFFFFFF); + } else { + tmphi = addrh; + tmplow = addrl; + } + dma_hw_desc = desc->hw_desc; + dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow); + dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->ops[src_idx].l = addrl; + xor_hw_desc->ops[src_idx].h |= addrh; + break; + } +} + +/** + * ppc440spe_desc_set_src_mult - set source address mult into the descriptor + */ +static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, u32 mult_index, + int sg_index, unsigned char mult_value) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + u32 *psgu; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + switch (sg_index) { + /* for RXOR operations set multiplier + * into source cued address + */ + case DMA_CDB_SG_SRC: + psgu = &dma_hw_desc->sg1u; + break; + /* for WXOR operations set multiplier + * into destination cued address(es) + */ + case DMA_CDB_SG_DST1: + psgu = &dma_hw_desc->sg2u; + break; + case DMA_CDB_SG_DST2: + psgu = &dma_hw_desc->sg3u; + break; + default: + BUG(); + } + + *psgu |= cpu_to_le32(mult_value << mult_index); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + break; + default: + BUG(); + } +} + +/** + * ppc440spe_desc_set_dest_addr - set destination address into the descriptor + */ +static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + dma_addr_t addrh, dma_addr_t addrl, + u32 dst_idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + phys_addr_t addr64, tmphi, tmplow; + u32 *psgu, *psgl; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (!addrh) { + addr64 = addrl; + tmphi = (addr64 >> 32); + tmplow = (addr64 & 0xFFFFFFFF); + } else { + tmphi = addrh; + tmplow = addrl; + } + dma_hw_desc = desc->hw_desc; + + psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u; + psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l; + + *psgl = cpu_to_le32((u32)tmplow); + *psgu |= cpu_to_le32((u32)tmphi); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->cbtal = addrl; + xor_hw_desc->cbtah |= addrh; + break; + } +} + +/** + * ppc440spe_desc_set_byte_count - set number of data bytes involved + * into the operation + */ +static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + u32 byte_count) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + dma_hw_desc->cnt = cpu_to_le32(byte_count); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->cbbc = byte_count; + break; + } +} + +/** + * ppc440spe_desc_set_rxor_block_size - set RXOR block size + */ +static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count) +{ + /* assume that byte_count is aligned on the 512-boundary; + * thus write it directly to the register (bits 23:31 are + * reserved there). + */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count); +} + +/** + * ppc440spe_desc_set_dcheck - set CHECK pattern + */ +static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, u8 *qword) +{ + struct dma_cdb *dma_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + iowrite32(qword[0], &dma_hw_desc->sg3l); + iowrite32(qword[4], &dma_hw_desc->sg3u); + iowrite32(qword[8], &dma_hw_desc->sg2l); + iowrite32(qword[12], &dma_hw_desc->sg2u); + break; + default: + BUG(); + } +} + +/** + * ppc440spe_xor_set_link - set link address in xor CB + */ +static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc, + struct ppc440spe_adma_desc_slot *next_desc) +{ + struct xor_cb *xor_hw_desc = prev_desc->hw_desc; + + if (unlikely(!next_desc || !(next_desc->phys))) { + printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n", + __func__, next_desc, + next_desc ? next_desc->phys : 0); + BUG(); + } + + xor_hw_desc->cbs = 0; + xor_hw_desc->cblal = next_desc->phys; + xor_hw_desc->cblah = 0; + xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT; +} + +/** + * ppc440spe_desc_set_link - set the address of descriptor following this + * descriptor in chain + */ +static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *prev_desc, + struct ppc440spe_adma_desc_slot *next_desc) +{ + unsigned long flags; + struct ppc440spe_adma_desc_slot *tail = next_desc; + + if (unlikely(!prev_desc || !next_desc || + (prev_desc->hw_next && prev_desc->hw_next != next_desc))) { + /* If previous next is overwritten something is wrong. + * though we may refetch from append to initiate list + * processing; in this case - it's ok. + */ + printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; " + "prev->hw_next=0x%p\n", __func__, prev_desc, + next_desc, prev_desc ? prev_desc->hw_next : 0); + BUG(); + } + + local_irq_save(flags); + + /* do s/w chaining both for DMA and XOR descriptors */ + prev_desc->hw_next = next_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + break; + case PPC440SPE_XOR_ID: + /* bind descriptor to the chain */ + while (tail->hw_next) + tail = tail->hw_next; + xor_last_linked = tail; + + if (prev_desc == xor_last_submit) + /* do not link to the last submitted CB */ + break; + ppc440spe_xor_set_link(prev_desc, next_desc); + break; + } + + local_irq_restore(flags); +} + +/** + * ppc440spe_desc_get_src_addr - extract the source address from the descriptor + */ +static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, int src_idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + /* May have 0, 1, 2, or 3 sources */ + switch (dma_hw_desc->opc) { + case DMA_CDB_OPC_NO_OP: + case DMA_CDB_OPC_DFILL128: + return 0; + case DMA_CDB_OPC_DCHECK128: + if (unlikely(src_idx)) { + printk(KERN_ERR "%s: try to get %d source for" + " DCHECK128\n", __func__, src_idx); + BUG(); + } + return le32_to_cpu(dma_hw_desc->sg1l); + case DMA_CDB_OPC_MULTICAST: + case DMA_CDB_OPC_MV_SG1_SG2: + if (unlikely(src_idx > 2)) { + printk(KERN_ERR "%s: try to get %d source from" + " DMA descr\n", __func__, src_idx); + BUG(); + } + if (src_idx) { + if (le32_to_cpu(dma_hw_desc->sg1u) & + DMA_CUED_XOR_WIN_MSK) { + u8 region; + + if (src_idx == 1) + return le32_to_cpu( + dma_hw_desc->sg1l) + + desc->unmap_len; + + region = (le32_to_cpu( + dma_hw_desc->sg1u)) >> + DMA_CUED_REGION_OFF; + + region &= DMA_CUED_REGION_MSK; + switch (region) { + case DMA_RXOR123: + return le32_to_cpu( + dma_hw_desc->sg1l) + + (desc->unmap_len << 1); + case DMA_RXOR124: + return le32_to_cpu( + dma_hw_desc->sg1l) + + (desc->unmap_len * 3); + case DMA_RXOR125: + return le32_to_cpu( + dma_hw_desc->sg1l) + + (desc->unmap_len << 2); + default: + printk(KERN_ERR + "%s: try to" + " get src3 for region %02x" + "PPC440SPE_DESC_RXOR12?\n", + __func__, region); + BUG(); + } + } else { + printk(KERN_ERR + "%s: try to get %d" + " source for non-cued descr\n", + __func__, src_idx); + BUG(); + } + } + return le32_to_cpu(dma_hw_desc->sg1l); + default: + printk(KERN_ERR "%s: unknown OPC 0x%02x\n", + __func__, dma_hw_desc->opc); + BUG(); + } + return le32_to_cpu(dma_hw_desc->sg1l); + case PPC440SPE_XOR_ID: + /* May have up to 16 sources */ + xor_hw_desc = desc->hw_desc; + return xor_hw_desc->ops[src_idx].l; + } + return 0; +} + +/** + * ppc440spe_desc_get_dest_addr - extract the destination address from the + * descriptor + */ +static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, int idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + if (likely(!idx)) + return le32_to_cpu(dma_hw_desc->sg2l); + return le32_to_cpu(dma_hw_desc->sg3l); + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + return xor_hw_desc->cbtal; + } + return 0; +} + +/** + * ppc440spe_desc_get_src_num - extract the number of source addresses from + * the descriptor + */ +static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + switch (dma_hw_desc->opc) { + case DMA_CDB_OPC_NO_OP: + case DMA_CDB_OPC_DFILL128: + return 0; + case DMA_CDB_OPC_DCHECK128: + return 1; + case DMA_CDB_OPC_MV_SG1_SG2: + case DMA_CDB_OPC_MULTICAST: + /* + * Only for RXOR operations we have more than + * one source + */ + if (le32_to_cpu(dma_hw_desc->sg1u) & + DMA_CUED_XOR_WIN_MSK) { + /* RXOR op, there are 2 or 3 sources */ + if (((le32_to_cpu(dma_hw_desc->sg1u) >> + DMA_CUED_REGION_OFF) & + DMA_CUED_REGION_MSK) == DMA_RXOR12) { + /* RXOR 1-2 */ + return 2; + } else { + /* RXOR 1-2-3/1-2-4/1-2-5 */ + return 3; + } + } + return 1; + default: + printk(KERN_ERR "%s: unknown OPC 0x%02x\n", + __func__, dma_hw_desc->opc); + BUG(); + } + case PPC440SPE_XOR_ID: + /* up to 16 sources */ + xor_hw_desc = desc->hw_desc; + return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK; + default: + BUG(); + } + return 0; +} + +/** + * ppc440spe_desc_get_dst_num - get the number of destination addresses in + * this descriptor + */ +static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct dma_cdb *dma_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* May be 1 or 2 destinations */ + dma_hw_desc = desc->hw_desc; + switch (dma_hw_desc->opc) { + case DMA_CDB_OPC_NO_OP: + case DMA_CDB_OPC_DCHECK128: + return 0; + case DMA_CDB_OPC_MV_SG1_SG2: + case DMA_CDB_OPC_DFILL128: + return 1; + case DMA_CDB_OPC_MULTICAST: + if (desc->dst_cnt == 2) + return 2; + else + return 1; + default: + printk(KERN_ERR "%s: unknown OPC 0x%02x\n", + __func__, dma_hw_desc->opc); + BUG(); + } + case PPC440SPE_XOR_ID: + /* Always only 1 destination */ + return 1; + default: + BUG(); + } + return 0; +} + +/** + * ppc440spe_desc_get_link - get the address of the descriptor that + * follows this one + */ +static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + if (!desc->hw_next) + return 0; + + return desc->hw_next->phys; +} + +/** + * ppc440spe_desc_is_aligned - check alignment + */ +static inline int ppc440spe_desc_is_aligned( + struct ppc440spe_adma_desc_slot *desc, int num_slots) +{ + return (desc->idx & (num_slots - 1)) ? 0 : 1; +} + +/** + * ppc440spe_chan_xor_slot_count - get the number of slots necessary for + * XOR operation + */ +static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt, + int *slots_per_op) +{ + int slot_cnt; + + /* each XOR descriptor provides up to 16 source operands */ + slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS; + + if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)) + return slot_cnt; + + printk(KERN_ERR "%s: len %d > max %d !!\n", + __func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + BUG(); + return slot_cnt; +} + +/** + * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for + * DMA2 PQ operation + */ +static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs, + int src_cnt, size_t len) +{ + signed long long order = 0; + int state = 0; + int addr_count = 0; + int i; + for (i = 1; i < src_cnt; i++) { + dma_addr_t cur_addr = srcs[i]; + dma_addr_t old_addr = srcs[i-1]; + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + if (i == src_cnt-1) + addr_count++; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + if (i == src_cnt-1) + addr_count++; + } else { + state = 3; + } + break; + case 1: + if (i == src_cnt-2 || (order == -1 + && cur_addr != old_addr - len)) { + order = 0; + state = 0; + addr_count++; + } else if (cur_addr == old_addr + len*order) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else if (cur_addr == old_addr + 2*len) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else if (cur_addr == old_addr + 3*len) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else { + order = 0; + state = 0; + addr_count++; + } + break; + case 2: + order = 0; + state = 0; + addr_count++; + break; + } + if (state == 3) + break; + } + if (src_cnt <= 1 || (state != 1 && state != 2)) { + pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n", + __func__, src_cnt, state, addr_count, order); + for (i = 0; i < src_cnt; i++) + pr_err("\t[%d] 0x%llx \n", i, srcs[i]); + BUG(); + } + + return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS; +} + + +/****************************************************************************** + * ADMA channel low-level routines + ******************************************************************************/ + +static u32 +ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan); +static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan); + +/** + * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine + */ +static void ppc440spe_adma_device_clear_eot_status( + struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + u8 *p = chan->device->dma_desc_pool_virt; + struct dma_cdb *cdb; + u32 rv, i; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* read FIFO to ack */ + dma_reg = chan->device->dma_reg; + while ((rv = ioread32(&dma_reg->csfpl))) { + i = rv & DMA_CDB_ADDR_MSK; + cdb = (struct dma_cdb *)&p[i - + (u32)chan->device->dma_desc_pool]; + + /* Clear opcode to ack. This is necessary for + * ZeroSum operations only + */ + cdb->opc = 0; + + if (test_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state)) { + /* probably this is a completed RXOR op, + * get pointer to CDB using the fact that + * physical and virtual addresses of CDB + * in pools have the same offsets + */ + if (le32_to_cpu(cdb->sg1u) & + DMA_CUED_XOR_BASE) { + /* this is a RXOR */ + clear_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state); + } + } + + if (rv & DMA_CDB_STATUS_MSK) { + /* ZeroSum check failed + */ + struct ppc440spe_adma_desc_slot *iter; + dma_addr_t phys = rv & ~DMA_CDB_MSK; + + /* + * Update the status of corresponding + * descriptor. + */ + list_for_each_entry(iter, &chan->chain, + chain_node) { + if (iter->phys == phys) + break; + } + /* + * if cannot find the corresponding + * slot it's a bug + */ + BUG_ON(&iter->chain_node == &chan->chain); + + if (iter->xor_check_result) { + if (test_bit(PPC440SPE_DESC_PCHECK, + &iter->flags)) { + *iter->xor_check_result |= + SUM_CHECK_P_RESULT; + } else + if (test_bit(PPC440SPE_DESC_QCHECK, + &iter->flags)) { + *iter->xor_check_result |= + SUM_CHECK_Q_RESULT; + } else + BUG(); + } + } + } + + rv = ioread32(&dma_reg->dsts); + if (rv) { + pr_err("DMA%d err status: 0x%x\n", + chan->device->id, rv); + /* write back to clear */ + iowrite32(rv, &dma_reg->dsts); + } + break; + case PPC440SPE_XOR_ID: + /* reset status bits to ack */ + xor_reg = chan->device->xor_reg; + rv = ioread32be(&xor_reg->sr); + iowrite32be(rv, &xor_reg->sr); + + if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) { + if (rv & XOR_IE_RPTIE_BIT) { + /* Read PLB Timeout Error. + * Try to resubmit the CB + */ + u32 val = ioread32be(&xor_reg->ccbalr); + + iowrite32be(val, &xor_reg->cblalr); + + val = ioread32be(&xor_reg->crsr); + iowrite32be(val | XOR_CRSR_XAE_BIT, + &xor_reg->crsr); + } else + pr_err("XOR ERR 0x%x status\n", rv); + break; + } + + /* if the XORcore is idle, but there are unprocessed CBs + * then refetch the s/w chain here + */ + if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) && + do_xor_refetch) + ppc440spe_chan_append(chan); + break; + } +} + +/** + * ppc440spe_chan_is_busy - get the channel status + */ +static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + int busy = 0; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_reg = chan->device->dma_reg; + /* if command FIFO's head and tail pointers are equal and + * status tail is the same as command, then channel is free + */ + if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) || + ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp)) + busy = 1; + break; + case PPC440SPE_XOR_ID: + /* use the special status bit for the XORcore + */ + xor_reg = chan->device->xor_reg; + busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0; + break; + } + + return busy; +} + +/** + * ppc440spe_chan_set_first_xor_descriptor - init XORcore chain + */ +static void ppc440spe_chan_set_first_xor_descriptor( + struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *next_desc) +{ + struct xor_regs *xor_reg = chan->device->xor_reg; + + if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) + printk(KERN_INFO "%s: Warn: XORcore is running " + "when try to set the first CDB!\n", + __func__); + + xor_last_submit = xor_last_linked = next_desc; + + iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr); + + iowrite32be(next_desc->phys, &xor_reg->cblalr); + iowrite32be(0, &xor_reg->cblahr); + iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT, + &xor_reg->cbcr); + + chan->hw_chain_inited = 1; +} + +/** + * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO. + * called with irqs disabled + */ +static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *desc) +{ + u32 pcdb; + struct dma_regs *dma_reg = chan->device->dma_reg; + + pcdb = desc->phys; + if (!test_bit(PPC440SPE_DESC_INT, &desc->flags)) + pcdb |= DMA_CDB_NO_INT; + + chan_last_sub[chan->device->id] = desc; + + ADMA_LL_DBG(print_cb(chan, desc->hw_desc)); + + iowrite32(pcdb, &dma_reg->cpfpl); +} + +/** + * ppc440spe_chan_append - update the h/w chain in the channel + */ +static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan) +{ + struct xor_regs *xor_reg; + struct ppc440spe_adma_desc_slot *iter; + struct xor_cb *xcb; + u32 cur_desc; + unsigned long flags; + + local_irq_save(flags); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + cur_desc = ppc440spe_chan_get_current_descriptor(chan); + + if (likely(cur_desc)) { + iter = chan_last_sub[chan->device->id]; + BUG_ON(!iter); + } else { + /* first peer */ + iter = chan_first_cdb[chan->device->id]; + BUG_ON(!iter); + ppc440spe_dma_put_desc(chan, iter); + chan->hw_chain_inited = 1; + } + + /* is there something new to append */ + if (!iter->hw_next) + break; + + /* flush descriptors from the s/w queue to fifo */ + list_for_each_entry_continue(iter, &chan->chain, chain_node) { + ppc440spe_dma_put_desc(chan, iter); + if (!iter->hw_next) + break; + } + break; + case PPC440SPE_XOR_ID: + /* update h/w links and refetch */ + if (!xor_last_submit->hw_next) + break; + + xor_reg = chan->device->xor_reg; + /* the last linked CDB has to generate an interrupt + * that we'd be able to append the next lists to h/w + * regardless of the XOR engine state at the moment of + * appending of these next lists + */ + xcb = xor_last_linked->hw_desc; + xcb->cbc |= XOR_CBCR_CBCE_BIT; + + if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) { + /* XORcore is idle. Refetch now */ + do_xor_refetch = 0; + ppc440spe_xor_set_link(xor_last_submit, + xor_last_submit->hw_next); + + ADMA_LL_DBG(print_cb_list(chan, + xor_last_submit->hw_next)); + + xor_last_submit = xor_last_linked; + iowrite32be(ioread32be(&xor_reg->crsr) | + XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT, + &xor_reg->crsr); + } else { + /* XORcore is running. Refetch later in the handler */ + do_xor_refetch = 1; + } + + break; + } + + local_irq_restore(flags); +} + +/** + * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor + */ +static u32 +ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + + if (unlikely(!chan->hw_chain_inited)) + /* h/w descriptor chain is not initialized yet */ + return 0; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_reg = chan->device->dma_reg; + return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK); + case PPC440SPE_XOR_ID: + xor_reg = chan->device->xor_reg; + return ioread32be(&xor_reg->ccbalr); + } + return 0; +} + +/** + * ppc440spe_chan_run - enable the channel + */ +static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan) +{ + struct xor_regs *xor_reg; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* DMAs are always enabled, do nothing */ + break; + case PPC440SPE_XOR_ID: + /* drain write buffer */ + xor_reg = chan->device->xor_reg; + + /* fetch descriptor pointed to in <link> */ + iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT, + &xor_reg->crsr); + break; + } +} + +/****************************************************************************** + * ADMA device level + ******************************************************************************/ + +static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan); +static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan); + +static dma_cookie_t +ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx); + +static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); +static void +ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); + +static void +ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t *paddr, unsigned long flags); +static void +ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); +static void +ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx, + unsigned char mult, int index, int dst_pos); +static void +ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t paddr, dma_addr_t qaddr); + +static struct page *ppc440spe_rxor_srcs[32]; + +/** + * ppc440spe_can_rxor - check if the operands may be processed with RXOR + */ +static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len) +{ + int i, order = 0, state = 0; + int idx = 0; + + if (unlikely(!(src_cnt > 1))) + return 0; + + BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs)); + + /* Skip holes in the source list before checking */ + for (i = 0; i < src_cnt; i++) { + if (!srcs[i]) + continue; + ppc440spe_rxor_srcs[idx++] = srcs[i]; + } + src_cnt = idx; + + for (i = 1; i < src_cnt; i++) { + char *cur_addr = page_address(ppc440spe_rxor_srcs[i]); + char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]); + + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + } else + goto out; + break; + case 1: + if ((i == src_cnt - 2) || + (order == -1 && cur_addr != old_addr - len)) { + order = 0; + state = 0; + } else if ((cur_addr == old_addr + len * order) || + (cur_addr == old_addr + 2 * len) || + (cur_addr == old_addr + 3 * len)) { + state = 2; + } else { + order = 0; + state = 0; + } + break; + case 2: + order = 0; + state = 0; + break; + } + } + +out: + if (state == 1 || state == 2) + return 1; + + return 0; +} + +/** + * ppc440spe_adma_device_estimate - estimate the efficiency of processing + * the operation given on this channel. It's assumed that 'chan' is + * capable to process 'cap' type of operation. + * @chan: channel to use + * @cap: type of transaction + * @dst_lst: array of destination pointers + * @dst_cnt: number of destination operands + * @src_lst: array of source pointers + * @src_cnt: number of source operands + * @src_sz: size of each source operand + */ +static int ppc440spe_adma_estimate(struct dma_chan *chan, + enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt, + struct page **src_lst, int src_cnt, size_t src_sz) +{ + int ef = 1; + + if (cap == DMA_PQ || cap == DMA_PQ_VAL) { + /* If RAID-6 capabilities were not activated don't try + * to use them + */ + if (unlikely(!ppc440spe_r6_enabled)) + return -1; + } + /* In the current implementation of ppc440spe ADMA driver it + * makes sense to pick out only pq case, because it may be + * processed: + * (1) either using Biskup method on DMA2; + * (2) or on DMA0/1. + * Thus we give a favour to (1) if the sources are suitable; + * else let it be processed on one of the DMA0/1 engines. + * In the sum_product case where destination is also the + * source process it on DMA0/1 only. + */ + if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) { + + if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1]) + ef = 0; /* sum_product case, process on DMA0/1 */ + else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz)) + ef = 3; /* override (DMA0/1 + idle) */ + else + ef = 0; /* can't process on DMA2 if !rxor */ + } + + /* channel idleness increases the priority */ + if (likely(ef) && + !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan))) + ef++; + + return ef; +} + +struct dma_chan * +ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap, + struct page **dst_lst, int dst_cnt, struct page **src_lst, + int src_cnt, size_t src_sz) +{ + struct dma_chan *best_chan = NULL; + struct ppc_dma_chan_ref *ref; + int best_rank = -1; + + if (unlikely(!src_sz)) + return NULL; + if (src_sz > PAGE_SIZE) { + /* + * should a user of the api ever pass > PAGE_SIZE requests + * we sort out cases where temporary page-sized buffers + * are used. + */ + switch (cap) { + case DMA_PQ: + if (src_cnt == 1 && dst_lst[1] == src_lst[0]) + return NULL; + if (src_cnt == 2 && dst_lst[1] == src_lst[1]) + return NULL; + break; + case DMA_PQ_VAL: + case DMA_XOR_VAL: + return NULL; + default: + break; + } + } + + list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) { + if (dma_has_cap(cap, ref->chan->device->cap_mask)) { + int rank; + + rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst, + dst_cnt, src_lst, src_cnt, src_sz); + if (rank > best_rank) { + best_rank = rank; + best_chan = ref->chan; + } + } + } + + return best_chan; +} +EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel); + +/** + * ppc440spe_get_group_entry - get group entry with index idx + * @tdesc: is the last allocated slot in the group. + */ +static struct ppc440spe_adma_desc_slot * +ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx) +{ + struct ppc440spe_adma_desc_slot *iter = tdesc->group_head; + int i = 0; + + if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) { + printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n", + __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt); + BUG(); + } + + list_for_each_entry(iter, &tdesc->group_list, chain_node) { + if (i++ == entry_idx) + break; + } + return iter; +} + +/** + * ppc440spe_adma_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &ppc440spe_chan->lock while calling this function + */ +static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot, + struct ppc440spe_adma_chan *chan) +{ + int stride = slot->slots_per_op; + + while (stride--) { + slot->slots_per_op = 0; + slot = list_entry(slot->slot_node.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } +} + +static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *desc) +{ + u32 src_cnt, dst_cnt; + dma_addr_t addr; + + /* + * get the number of sources & destination + * included in this descriptor and unmap + * them all + */ + src_cnt = ppc440spe_desc_get_src_num(desc, chan); + dst_cnt = ppc440spe_desc_get_dst_num(desc, chan); + + /* unmap destinations */ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + while (dst_cnt--) { + addr = ppc440spe_desc_get_dest_addr( + desc, chan, dst_cnt); + dma_unmap_page(chan->device->dev, + addr, desc->unmap_len, + DMA_FROM_DEVICE); + } + } + + /* unmap sources */ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = ppc440spe_desc_get_src_addr( + desc, chan, src_cnt); + dma_unmap_page(chan->device->dev, + addr, desc->unmap_len, + DMA_TO_DEVICE); + } + } +} + +/** + * ppc440spe_adma_run_tx_complete_actions - call functions to be called + * upon completion + */ +static dma_cookie_t ppc440spe_adma_run_tx_complete_actions( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + dma_cookie_t cookie) +{ + int i; + + BUG_ON(desc->async_tx.cookie < 0); + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + * + * actually, ppc's dma_unmap_page() functions are empty, so + * the following code is just for the sake of completeness + */ + if (chan && chan->needs_unmap && desc->group_head && + desc->unmap_len) { + struct ppc440spe_adma_desc_slot *unmap = + desc->group_head; + /* assume 1 slot per op always */ + u32 slot_count = unmap->slot_cnt; + + /* Run through the group list and unmap addresses */ + for (i = 0; i < slot_count; i++) { + BUG_ON(!unmap); + ppc440spe_adma_unmap(chan, unmap); + unmap = unmap->hw_next; + } + } + } + + /* run dependent operations */ + dma_run_dependencies(&desc->async_tx); + + return cookie; +} + +/** + * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set) + */ +static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) + return 0; + + /* leave the last descriptor in the chain + * so we can append to it + */ + if (list_is_last(&desc->chain_node, &chan->chain) || + desc->phys == ppc440spe_chan_get_current_descriptor(chan)) + return 1; + + if (chan->device->id != PPC440SPE_XOR_ID) { + /* our DMA interrupt handler clears opc field of + * each processed descriptor. For all types of + * operations except for ZeroSum we do not actually + * need ack from the interrupt handler. ZeroSum is a + * special case since the result of this operation + * is available from the handler only, so if we see + * such type of descriptor (which is unprocessed yet) + * then leave it in chain. + */ + struct dma_cdb *cdb = desc->hw_desc; + if (cdb->opc == DMA_CDB_OPC_DCHECK128) + return 1; + } + + dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n", + desc->phys, desc->idx, desc->slots_per_op); + + list_del(&desc->chain_node); + ppc440spe_adma_free_slots(desc, chan); + return 0; +} + +/** + * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine + * which runs through the channel CDBs list until reach the descriptor + * currently processed. When routine determines that all CDBs of group + * are completed then corresponding callbacks (if any) are called and slots + * are freed. + */ +static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL; + dma_cookie_t cookie = 0; + u32 current_desc = ppc440spe_chan_get_current_descriptor(chan); + int busy = ppc440spe_chan_is_busy(chan); + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; + + dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n", + chan->device->id, __func__); + + if (!current_desc) { + /* There were no transactions yet, so + * nothing to clean + */ + return; + } + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + list_for_each_entry_safe(iter, _iter, &chan->chain, + chain_node) { + dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d " + "busy: %d this_desc: %#llx next_desc: %#x " + "cur: %#x ack: %d\n", + iter->async_tx.cookie, iter->idx, busy, iter->phys, + ppc440spe_desc_get_link(iter, chan), current_desc, + async_tx_test_ack(&iter->async_tx)); + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel,subsequent descriptors are either in process + * or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy, or if it appears that the current descriptor + * needs to be re-read (i.e. has been appended to) + */ + if (iter->phys == current_desc) { + BUG_ON(seen_current++); + if (busy || ppc440spe_desc_get_link(iter, chan)) { + /* not all descriptors of the group have + * been completed; exit. + */ + break; + } + } + + /* detect the start of a group transaction */ + if (!slot_cnt && !slots_per_op) { + slot_cnt = iter->slot_cnt; + slots_per_op = iter->slots_per_op; + if (slot_cnt <= slots_per_op) { + slot_cnt = 0; + slots_per_op = 0; + } + } + + if (slot_cnt) { + if (!group_start) + group_start = iter; + slot_cnt -= slots_per_op; + } + + /* all the members of a group are complete */ + if (slots_per_op != 0 && slot_cnt == 0) { + struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter; + int end_of_chain = 0; + + /* clean up the group */ + slot_cnt = group_start->slot_cnt; + grp_iter = group_start; + list_for_each_entry_safe_from(grp_iter, _grp_iter, + &chan->chain, chain_node) { + + cookie = ppc440spe_adma_run_tx_complete_actions( + grp_iter, chan, cookie); + + slot_cnt -= slots_per_op; + end_of_chain = ppc440spe_adma_clean_slot( + grp_iter, chan); + if (end_of_chain && slot_cnt) { + /* Should wait for ZeroSum completion */ + if (cookie > 0) + chan->common.completed_cookie = cookie; + return; + } + + if (slot_cnt == 0 || end_of_chain) + break; + } + + /* the group should be complete at this point */ + BUG_ON(slot_cnt); + + slots_per_op = 0; + group_start = NULL; + if (end_of_chain) + break; + else + continue; + } else if (slots_per_op) /* wait for group completion */ + continue; + + cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan, + cookie); + + if (ppc440spe_adma_clean_slot(iter, chan)) + break; + } + + BUG_ON(!seen_current); + + if (cookie > 0) { + chan->common.completed_cookie = cookie; + pr_debug("\tcompleted cookie %d\n", cookie); + } + +} + +/** + * ppc440spe_adma_tasklet - clean up watch-dog initiator + */ +static void ppc440spe_adma_tasklet(unsigned long data) +{ + struct ppc440spe_adma_chan *chan = (struct ppc440spe_adma_chan *) data; + + spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING); + __ppc440spe_adma_slot_cleanup(chan); + spin_unlock(&chan->lock); +} + +/** + * ppc440spe_adma_slot_cleanup - clean up scheduled initiator + */ +static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan) +{ + spin_lock_bh(&chan->lock); + __ppc440spe_adma_slot_cleanup(chan); + spin_unlock_bh(&chan->lock); +} + +/** + * ppc440spe_adma_alloc_slots - allocate free slots (if any) + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots( + struct ppc440spe_adma_chan *chan, int num_slots, + int slots_per_op) +{ + struct ppc440spe_adma_desc_slot *iter = NULL, *_iter; + struct ppc440spe_adma_desc_slot *alloc_start = NULL; + struct list_head chain = LIST_HEAD_INIT(chain); + int slots_found, retry = 0; + + + BUG_ON(!num_slots || !slots_per_op); + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = chan->last_used; + else + iter = list_entry(&chan->all_slots, + struct ppc440spe_adma_desc_slot, + slot_node); + list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots, + slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) + alloc_start = iter; + + if (slots_found == num_slots) { + struct ppc440spe_adma_desc_slot *alloc_tail = NULL; + struct ppc440spe_adma_desc_slot *last_used = NULL; + + iter = alloc_start; + while (num_slots) { + int i; + /* pre-ack all but the last descriptor */ + if (num_slots != slots_per_op) + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->hw_next = NULL; + iter->flags = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->group_list); + chan->last_used = last_used; + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&chan->irq_tasklet); + return NULL; +} + +/** + * ppc440spe_adma_alloc_chan_resources - allocate pools for CDB slots + */ +static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *slot = NULL; + char *hw_desc; + int i, db_sz; + int init; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + init = ppc440spe_chan->slots_allocated ? 0 : 1; + chan->chan_id = ppc440spe_chan->device->id; + + /* Allocate descriptor slots */ + i = ppc440spe_chan->slots_allocated; + if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID) + db_sz = sizeof(struct dma_cdb); + else + db_sz = sizeof(struct xor_cb); + + for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) { + slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot), + GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "SPE ADMA Channel only initialized" + " %d descriptor slots", i--); + break; + } + + hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[i * db_sz]; + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = ppc440spe_adma_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->group_list); + slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz; + slot->idx = i; + + spin_lock_bh(&ppc440spe_chan->lock); + ppc440spe_chan->slots_allocated++; + list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots); + spin_unlock_bh(&ppc440spe_chan->lock); + } + + if (i && !ppc440spe_chan->last_used) { + ppc440spe_chan->last_used = + list_entry(ppc440spe_chan->all_slots.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: allocated %d descriptor slots\n", + ppc440spe_chan->device->id, i); + + /* initialize the channel and the chain with a null operation */ + if (init) { + switch (ppc440spe_chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + ppc440spe_chan->hw_chain_inited = 0; + /* Use WXOR for self-testing */ + if (!ppc440spe_r6_tchan) + ppc440spe_r6_tchan = ppc440spe_chan; + break; + case PPC440SPE_XOR_ID: + ppc440spe_chan_start_null_xor(ppc440spe_chan); + break; + default: + BUG(); + } + ppc440spe_chan->needs_unmap = 1; + } + + return (i > 0) ? i : -ENOMEM; +} + +/** + * ppc440spe_rxor_set_region_data - + */ +static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, u32 mask) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= mask; +} + +/** + * ppc440spe_rxor_set_src - + */ +static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, dma_addr_t addr) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE; + xcb->ops[xor_arg_no].l = addr; +} + +/** + * ppc440spe_rxor_set_mult - + */ +static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, u8 idx, u8 mult) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8); +} + +/** + * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold + * has been achieved + */ +static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan) +{ + dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n", + chan->device->id, chan->pending); + + if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) { + chan->pending = 0; + ppc440spe_chan_append(chan); + } +} + +/** + * ppc440spe_adma_tx_submit - submit new descriptor group to the channel + * (it's not necessary that descriptors will be submitted to the h/w + * chains too right now) + */ +static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ppc440spe_adma_desc_slot *sw_desc; + struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan); + struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail; + int slot_cnt; + int slots_per_op; + dma_cookie_t cookie; + + sw_desc = tx_to_ppc440spe_adma_slot(tx); + + group_start = sw_desc->group_head; + slot_cnt = group_start->slot_cnt; + slots_per_op = group_start->slots_per_op; + + spin_lock_bh(&chan->lock); + cookie = dma_cookie_assign(tx); + + if (unlikely(list_empty(&chan->chain))) { + /* first peer */ + list_splice_init(&sw_desc->group_list, &chan->chain); + chan_first_cdb[chan->device->id] = group_start; + } else { + /* isn't first peer, bind CDBs to chain */ + old_chain_tail = list_entry(chan->chain.prev, + struct ppc440spe_adma_desc_slot, + chain_node); + list_splice_init(&sw_desc->group_list, + &old_chain_tail->chain_node); + /* fix up the hardware chain */ + ppc440spe_desc_set_link(chan, old_chain_tail, group_start); + } + + /* increment the pending count by the number of operations */ + chan->pending += slot_cnt / slots_per_op; + ppc440spe_adma_check_threshold(chan); + spin_unlock_bh(&chan->lock); + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n", + chan->device->id, __func__, + sw_desc->async_tx.cookie, sw_desc->idx, sw_desc); + + return cookie; +} + +/** + * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt( + struct dma_chan *chan, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s\n", ppc440spe_chan->device->id, + __func__); + + spin_lock_bh(&ppc440spe_chan->lock); + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan); + group_start->unmap_len = 0; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (unlikely(!len)) + return NULL; + + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); + + spin_lock_bh(&ppc440spe_chan->lock); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s len: %u int_en %d\n", + ppc440spe_chan->device->id, __func__, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_memcpy(group_start, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_memset - prepare CDB for a MEMSET operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( + struct dma_chan *chan, dma_addr_t dma_dest, int value, + size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (unlikely(!len)) + return NULL; + + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); + + spin_lock_bh(&ppc440spe_chan->lock); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s cal: %u len: %u int_en %d\n", + ppc440spe_chan->device->id, __func__, value, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_memset(group_start, value, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( + struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t *dma_src, u32 src_cnt, size_t len, + unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id, + dma_dest, dma_src, src_cnt)); + if (unlikely(!len)) + return NULL; + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc440spe_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + spin_lock_bh(&ppc440spe_chan->lock); + slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op); + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_xor(group_start, src_cnt, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + while (src_cnt--) + ppc440spe_adma_memcpy_xor_set_src(group_start, + dma_src[src_cnt], src_cnt); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static inline void +ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc, + int src_cnt); +static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor); + +/** + * ppc440spe_adma_init_dma2rxor_slot - + */ +static void ppc440spe_adma_init_dma2rxor_slot( + struct ppc440spe_adma_desc_slot *desc, + dma_addr_t *src, int src_cnt) +{ + int i; + + /* initialize CDB */ + for (i = 0; i < src_cnt; i++) { + ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i, + desc->src_cnt, (u32)src[i]); + } +} + +/** + * ppc440spe_dma01_prep_mult - + * for Q operation where destination is also the source + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC440SPE_DESC_WXOR, &op); + slot_cnt = 2; + + spin_lock_bh(&ppc440spe_chan->lock); + + /* use WXOR, each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc440spe_adma_chan *chan; + struct ppc440spe_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = dst_cnt; + /* First descriptor, zero data in the destination and copy it + * to q page using MULTICAST transfer. + */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* + * Second descriptor, multiply data from the q page + * and store the result in real destination. + */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, dst[1]); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +/** + * ppc440spe_dma01_prep_sum_product - + * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also + * the source. + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC440SPE_DESC_WXOR, &op); + slot_cnt = 3; + + spin_lock_bh(&ppc440spe_chan->lock); + + /* WXOR, each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc440spe_adma_chan *chan; + struct ppc440spe_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = 1; + /* 1st descriptor, src[1] data to q page and zero destination */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->qdest, 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[1]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* 2nd descriptor, multiply src[1] data and store the + * result in destination */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + ppc440spe_chan->qdest); + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[1]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* + * 3rd descriptor, multiply src[0] data and xor it + * with destination + */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + int slot_cnt; + struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len); + /* select operations WXOR/RXOR depending on the + * source addresses of operators and the number + * of destinations (RXOR support only Q-parity calculations) + */ + set_bit(PPC440SPE_DESC_WXOR, &op); + if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) { + /* no active RXOR; + * do RXOR if: + * - there are more than 1 source, + * - len is aligned on 512-byte boundary, + * - source addresses fit to one of 4 possible regions. + */ + if (src_cnt > 1 && + !(len & MQ0_CF2H_RXOR_BS_MASK) && + (src[0] + len) == src[1]) { + /* may do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR, &op); + if (src_cnt != 2) { + /* may try to enhance region of RXOR */ + if ((src[1] + len) == src[2]) { + /* do RXOR R1 R2 R3 */ + set_bit(PPC440SPE_DESC_RXOR123, + &op); + } else if ((src[1] + len * 2) == src[2]) { + /* do RXOR R1 R2 R4 */ + set_bit(PPC440SPE_DESC_RXOR124, &op); + } else if ((src[1] + len * 3) == src[2]) { + /* do RXOR R1 R2 R5 */ + set_bit(PPC440SPE_DESC_RXOR125, + &op); + } else { + /* do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR12, + &op); + } + } else { + /* do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR12, &op); + } + } + + if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { + /* can not do this operation with RXOR */ + clear_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state); + } else { + /* can do; set block size right now */ + ppc440spe_desc_set_rxor_block_size(len); + } + } + + /* Number of necessary slots depends on operation type selected */ + if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { + /* This is a WXOR only chain. Need descriptors for each + * source to GF-XOR them with WXOR, and need descriptors + * for each destination to zero them with WXOR + */ + slot_cnt = src_cnt; + + if (flags & DMA_PREP_ZERO_P) { + slot_cnt++; + set_bit(PPC440SPE_ZERO_P, &op); + } + if (flags & DMA_PREP_ZERO_Q) { + slot_cnt++; + set_bit(PPC440SPE_ZERO_Q, &op); + } + } else { + /* Need 1/2 descriptor for RXOR operation, and + * need (src_cnt - (2 or 3)) for WXOR of sources + * remained (if any) + */ + slot_cnt = dst_cnt; + + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC440SPE_ZERO_P, &op); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC440SPE_ZERO_Q, &op); + + if (test_bit(PPC440SPE_DESC_RXOR12, &op)) + slot_cnt += src_cnt - 2; + else + slot_cnt += src_cnt - 3; + + /* Thus we have either RXOR only chain or + * mixed RXOR/WXOR + */ + if (slot_cnt == dst_cnt) + /* RXOR only chain */ + clear_bit(PPC440SPE_DESC_WXOR, &op); + } + + spin_lock_bh(&ppc440spe_chan->lock); + /* for both RXOR/WXOR each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt, + flags, op); + + /* setup dst/src/mult */ + pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n", + __func__, dst[0], dst[1]); + ppc440spe_adma_pq_set_dest(sw_desc, dst, flags); + while (src_cnt--) { + ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], + src_cnt); + + /* NOTE: "Multi = 0 is equivalent to = 1" as it + * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf + * doesn't work for RXOR with DMA0/1! Instead, multi=0 + * leads to zeroing source data after RXOR. + * So, for P case set-up mult=1 explicitly. + */ + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc440spe_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + + /* Setup byte count foreach slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, + chain_node) { + ppc440spe_desc_set_byte_count(iter, + ppc440spe_chan, len); + iter->unmap_len = len; + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + int slot_cnt, descs_per_op; + struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + BUG_ON(!dst_cnt); + /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len);*/ + + spin_lock_bh(&ppc440spe_chan->lock); + descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len); + if (descs_per_op < 0) { + spin_unlock_bh(&ppc440spe_chan->lock); + return NULL; + } + + /* depending on number of sources we have 1 or 2 RXOR chains */ + slot_cnt = descs_per_op * dst_cnt; + + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + op = slot_cnt; + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt, + --op ? 0 : flags); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = len; + + ppc440spe_init_rxor_cursor(&(iter->rxor_cursor)); + iter->rxor_cursor.len = len; + iter->descs_per_op = descs_per_op; + } + op = 0; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + op++; + if (op % descs_per_op == 0) + ppc440spe_adma_init_dma2rxor_slot(iter, src, + src_cnt); + if (likely(!list_is_last(&iter->chain_node, + &sw_desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = + list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + ppc440spe_xor_set_link(iter, iter->hw_next); + } else { + /* this is the last descriptor. */ + iter->hw_next = NULL; + } + } + + /* fixup head descriptor */ + sw_desc->dst_cnt = dst_cnt; + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC440SPE_ZERO_P, &sw_desc->flags); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags); + + /* setup dst/src/mult */ + ppc440spe_adma_pq_set_dest(sw_desc, dst, flags); + + while (src_cnt--) { + /* handle descriptors (if dst_cnt == 2) inside + * the ppc440spe_adma_pq_set_srcxxx() functions + */ + ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], + src_cnt); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc440spe_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + ppc440spe_desc_set_rxor_block_size(len); + return sw_desc; +} + +/** + * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + int dst_cnt = 0; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, + dst, src, src_cnt)); + BUG_ON(!len); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + BUG_ON(!src_cnt); + + if (src_cnt == 1 && dst[1] == src[0]) { + dma_addr_t dest[2]; + + /* dst[1] is real destination (Q) */ + dest[0] = dst[1]; + /* this is the page to multicast source data to */ + dest[1] = ppc440spe_chan->qdest; + sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan, + dest, 2, src, src_cnt, scf, len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (src_cnt == 2 && dst[1] == src[1]) { + sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan, + &dst[1], src, 2, scf, len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_P)) { + BUG_ON(!dst[0]); + dst_cnt++; + flags |= DMA_PREP_ZERO_P; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) { + BUG_ON(!dst[1]); + dst_cnt++; + flags |= DMA_PREP_ZERO_Q; + } + + BUG_ON(!dst_cnt); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc440spe_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + switch (ppc440spe_chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan, + dst, dst_cnt, src, src_cnt, scf, + len, flags); + break; + + case PPC440SPE_XOR_ID: + sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan, + dst, dst_cnt, src, src_cnt, scf, + len, flags); + break; + } + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for + * a PQ_ZERO_SUM operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *iter; + dma_addr_t pdest, qdest; + int slot_cnt, slots_per_op, idst, dst_cnt; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + pdest = 0; + else + pdest = pq[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qdest = 0; + else + qdest = pq[1]; + + ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id, + src, src_cnt, scf)); + + /* Always use WXOR for P/Q calculations (two destinations). + * Need 1 or 2 extra slots to verify results are zero. + */ + idst = dst_cnt = (pdest && qdest) ? 2 : 1; + + /* One additional slot per destination to clone P/Q + * before calculation (we have to preserve destinations). + */ + slot_cnt = src_cnt + dst_cnt * 2; + slots_per_op = 1; + + spin_lock_bh(&ppc440spe_chan->lock); + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt); + + /* Setup byte count for each slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = len; + } + + if (pdest) { + struct dma_cdb *hw_desc; + struct ppc440spe_adma_chan *chan; + + iter = sw_desc->group_head; + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->pdest, 0); + ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = 0; + /* override pdest to preserve original P */ + pdest = ppc440spe_chan->pdest; + } + if (qdest) { + struct dma_cdb *hw_desc; + struct ppc440spe_adma_chan *chan; + + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + + if (pdest) { + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->qdest, 0); + ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = 0; + /* override qdest to preserve original Q */ + qdest = ppc440spe_chan->qdest; + } + + /* Setup destinations for P/Q ops */ + ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest); + + /* Setup zero QWORDs into DCHECK CDBs */ + idst = dst_cnt; + list_for_each_entry_reverse(iter, &sw_desc->group_list, + chain_node) { + /* + * The last CDB corresponds to Q-parity check, + * the one before last CDB corresponds + * P-parity check + */ + if (idst == DMA_DEST_MAX_NUM) { + if (idst == dst_cnt) { + set_bit(PPC440SPE_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC440SPE_DESC_PCHECK, + &iter->flags); + } + } else { + if (qdest) { + set_bit(PPC440SPE_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC440SPE_DESC_PCHECK, + &iter->flags); + } + } + iter->xor_check_result = pqres; + + /* + * set it to zero, if check fail then result will + * be updated + */ + *iter->xor_check_result = 0; + ppc440spe_desc_set_dcheck(iter, ppc440spe_chan, + ppc440spe_qword); + + if (!(--dst_cnt)) + break; + } + + /* Setup sources and mults for P/Q ops */ + list_for_each_entry_continue_reverse(iter, &sw_desc->group_list, + chain_node) { + struct ppc440spe_adma_chan *chan; + u32 mult_dst; + + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + ppc440spe_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, + src[src_cnt - 1]); + if (qdest) { + mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + ppc440spe_desc_set_src_mult(iter, chan, + DMA_CUED_MULT1_OFF, + mult_dst, + scf[src_cnt - 1]); + } + if (!(--src_cnt)) + break; + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for + * XOR ZERO_SUM operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum( + struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, + size_t len, enum sum_check_flags *result, unsigned long flags) +{ + struct dma_async_tx_descriptor *tx; + dma_addr_t pq[2]; + + /* validate P, disable Q */ + pq[0] = src[0]; + pq[1] = 0; + flags |= DMA_PREP_PQ_DISABLE_Q; + + tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1], + src_cnt - 1, 0, len, + result, flags); + return tx; +} + +/** + * ppc440spe_adma_set_dest - set destination address into descriptor + */ +static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + + BUG_ON(index >= sw_desc->dst_cnt); + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* to do: support transfers lengths > + * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT + */ + ppc440spe_desc_set_dest_addr(sw_desc->group_head, + chan, 0, addr, index); + break; + case PPC440SPE_XOR_ID: + sw_desc = ppc440spe_get_group_entry(sw_desc, index); + ppc440spe_desc_set_dest_addr(sw_desc, + chan, 0, addr, index); + break; + } +} + +static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter, + struct ppc440spe_adma_chan *chan, dma_addr_t addr) +{ + /* To clear destinations update the descriptor + * (P or Q depending on index) as follows: + * addr is destination (0 corresponds to SG2): + */ + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0); + + /* ... and the addr is source: */ + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr); + + /* addr is always SG2 then the mult is always DST1 */ + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, 1); +} + +/** + * ppc440spe_adma_pq_set_dest - set destination address into descriptor + * for the PQXOR operation + */ +static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t *addrs, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *iter; + struct ppc440spe_adma_chan *chan; + dma_addr_t paddr, qaddr; + dma_addr_t addr = 0, ppath, qpath; + int index = 0, i; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + paddr = 0; + else + paddr = addrs[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qaddr = 0; + else + qaddr = addrs[1]; + + if (!paddr || !qaddr) + addr = paddr ? paddr : qaddr; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* walk through the WXOR source list and set P/Q-destinations + * for each slot: + */ + if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + /* This is WXOR-only chain; may have 1/2 zero descs */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + index++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + index++; + + iter = ppc440spe_get_group_entry(sw_desc, index); + if (addr) { + /* one destination */ + list_for_each_entry_from(iter, + &sw_desc->group_list, chain_node) + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, addr, 0); + } else { + /* two destinations */ + list_for_each_entry_from(iter, + &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, paddr, 0); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, qaddr, 1); + } + } + + if (index) { + /* To clear destinations update the descriptor + * (1st,2nd, or both depending on flags) + */ + index = 0; + if (test_bit(PPC440SPE_ZERO_P, + &sw_desc->flags)) { + iter = ppc440spe_get_group_entry( + sw_desc, index++); + ppc440spe_adma_pq_zero_op(iter, chan, + paddr); + } + + if (test_bit(PPC440SPE_ZERO_Q, + &sw_desc->flags)) { + iter = ppc440spe_get_group_entry( + sw_desc, index++); + ppc440spe_adma_pq_zero_op(iter, chan, + qaddr); + } + + return; + } + } else { + /* This is RXOR-only or RXOR/WXOR mixed chain */ + + /* If we want to include destination into calculations, + * then make dest addresses cued with mult=1 (XOR). + */ + ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + /* Setup destination(s) in RXOR slot(s) */ + iter = ppc440spe_get_group_entry(sw_desc, index++); + ppc440spe_desc_set_dest_addr(iter, chan, + paddr ? ppath : qpath, + paddr ? paddr : qaddr, 0); + if (!addr) { + /* two destinations */ + iter = ppc440spe_get_group_entry(sw_desc, + index++); + ppc440spe_desc_set_dest_addr(iter, chan, + qpath, qaddr, 0); + } + + if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) { + /* Setup destination(s) in remaining WXOR + * slots + */ + iter = ppc440spe_get_group_entry(sw_desc, + index); + if (addr) { + /* one destination */ + list_for_each_entry_from(iter, + &sw_desc->group_list, + chain_node) + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + addr, 0); + + } else { + /* two destinations */ + list_for_each_entry_from(iter, + &sw_desc->group_list, + chain_node) { + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + paddr, 0); + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + qaddr, 1); + } + } + } + + } + break; + + case PPC440SPE_XOR_ID: + /* DMA2 descriptors have only 1 destination, so there are + * two chains - one for each dest. + * If we want to include destination into calculations, + * then make dest addresses cued with mult=1 (XOR). + */ + ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + iter = ppc440spe_get_group_entry(sw_desc, 0); + for (i = 0; i < sw_desc->descs_per_op; i++) { + ppc440spe_desc_set_dest_addr(iter, chan, + paddr ? ppath : qpath, + paddr ? paddr : qaddr, 0); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + if (!addr) { + /* Two destinations; setup Q here */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + for (i = 0; i < sw_desc->descs_per_op; i++) { + ppc440spe_desc_set_dest_addr(iter, + chan, qpath, qaddr, 0); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + } + + break; + } +} + +/** + * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor + * for the PQ_ZERO_SUM operation + */ +static void ppc440spe_adma_pqzero_sum_set_dest( + struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t paddr, dma_addr_t qaddr) +{ + struct ppc440spe_adma_desc_slot *iter, *end; + struct ppc440spe_adma_chan *chan; + dma_addr_t addr = 0; + int idx; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + /* walk through the WXOR source list and set P/Q-destinations + * for each slot + */ + idx = (paddr && qaddr) ? 2 : 1; + /* set end */ + list_for_each_entry_reverse(end, &sw_desc->group_list, + chain_node) { + if (!(--idx)) + break; + } + /* set start */ + idx = (paddr && qaddr) ? 2 : 1; + iter = ppc440spe_get_group_entry(sw_desc, idx); + + if (paddr && qaddr) { + /* two destinations */ + list_for_each_entry_from(iter, &sw_desc->group_list, + chain_node) { + if (unlikely(iter == end)) + break; + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, paddr, 0); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, qaddr, 1); + } + } else { + /* one destination */ + addr = paddr ? paddr : qaddr; + list_for_each_entry_from(iter, &sw_desc->group_list, + chain_node) { + if (unlikely(iter == end)) + break; + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, addr, 0); + } + } + + /* The remaining descriptors are DATACHECK. These have no need in + * destination. Actually, these destinations are used there + * as sources for check operation. So, set addr as source. + */ + ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr); + + if (!addr) { + end = list_entry(end->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr); + } +} + +/** + * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor + */ +static inline void ppc440spe_desc_set_xor_src_cnt( + struct ppc440spe_adma_desc_slot *desc, + int src_cnt) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + hw_desc->cbc &= ~XOR_CDCR_OAC_MSK; + hw_desc->cbc |= src_cnt; +} + +/** + * ppc440spe_adma_pq_set_src - set source address into descriptor + */ +static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + dma_addr_t haddr = 0; + struct ppc440spe_adma_desc_slot *iter = NULL; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain + */ + if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + /* RXOR-only or RXOR/WXOR operation */ + int iskip = test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags) ? 2 : 3; + + if (index == 0) { + /* 1st slot (RXOR) */ + /* setup sources region (R1-2-3, R1-2-4, + * or R1-2-5) + */ + if (test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags)) + haddr = DMA_RXOR12 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR123, + &sw_desc->flags)) + haddr = DMA_RXOR123 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR124, + &sw_desc->flags)) + haddr = DMA_RXOR124 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR125, + &sw_desc->flags)) + haddr = DMA_RXOR125 << + DMA_CUED_REGION_OFF; + else + BUG(); + haddr |= DMA_CUED_XOR_BASE; + iter = ppc440spe_get_group_entry(sw_desc, 0); + } else if (index < iskip) { + /* 1st slot (RXOR) + * shall actually set source address only once + * instead of first <iskip> + */ + iter = NULL; + } else { + /* 2nd/3d and next slots (WXOR); + * skip first slot with RXOR + */ + haddr = DMA_CUED_XOR_HB; + iter = ppc440spe_get_group_entry(sw_desc, + index - iskip + sw_desc->dst_cnt); + } + } else { + int znum = 0; + + /* WXOR-only operation; skip first slots with + * zeroing destinations + */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + znum++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + znum++; + + haddr = DMA_CUED_XOR_HB; + iter = ppc440spe_get_group_entry(sw_desc, + index + znum); + } + + if (likely(iter)) { + ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr); + + if (!index && + test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) && + sw_desc->dst_cnt == 2) { + /* if we have two destinations for RXOR, then + * setup source in the second descr too + */ + iter = ppc440spe_get_group_entry(sw_desc, 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, + haddr, addr); + } + } + break; + + case PPC440SPE_XOR_ID: + /* DMA2 may do Biskup */ + iter = sw_desc->group_head; + if (iter->dst_cnt == 2) { + /* both P & Q calculations required; set P src here */ + ppc440spe_adma_dma2rxor_set_src(iter, index, addr); + + /* this is for Q */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + } + ppc440spe_adma_dma2rxor_set_src(iter, index, addr); + break; + } +} + +/** + * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor + */ +static void ppc440spe_adma_memcpy_xor_set_src( + struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + sw_desc = sw_desc->group_head; + + if (likely(sw_desc)) + ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr); +} + +/** + * ppc440spe_adma_dma2rxor_inc_addr - + */ +static void ppc440spe_adma_dma2rxor_inc_addr( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_rxor *cursor, int index, int src_cnt) +{ + cursor->addr_count++; + if (index == src_cnt - 1) { + ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count); + } else if (cursor->addr_count == XOR_MAX_OPS) { + ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count); + cursor->addr_count = 0; + cursor->desc_count++; + } +} + +/** + * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB + */ +static int ppc440spe_adma_dma2rxor_prep_src( + struct ppc440spe_adma_desc_slot *hdesc, + struct ppc440spe_rxor *cursor, int index, + int src_cnt, u32 addr) +{ + int rval = 0; + u32 sign; + struct ppc440spe_adma_desc_slot *desc = hdesc; + int i; + + for (i = 0; i < cursor->desc_count; i++) { + desc = list_entry(hdesc->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + switch (cursor->state) { + case 0: + if (addr == cursor->addrl + cursor->len) { + /* direct RXOR */ + cursor->state = 1; + cursor->xor_count++; + if (index == src_cnt-1) { + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (cursor->addrl == addr + cursor->len) { + /* reverse RXOR */ + cursor->state = 1; + cursor->xor_count++; + set_bit(cursor->addr_count, &desc->reverse_flags[0]); + if (index == src_cnt-1) { + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else { + printk(KERN_ERR "Cannot build " + "DMA2 RXOR command block.\n"); + BUG(); + } + break; + case 1: + sign = test_bit(cursor->addr_count, + desc->reverse_flags) + ? -1 : 1; + if (index == src_cnt-2 || (sign == -1 + && addr != cursor->addrl - 2*cursor->len)) { + cursor->state = 0; + cursor->xor_count = 1; + cursor->addrl = addr; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } else if (addr == cursor->addrl + 2*sign*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR123 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (addr == cursor->addrl + 3*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR124 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (addr == cursor->addrl + 4*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR125 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else { + cursor->state = 0; + cursor->xor_count = 1; + cursor->addrl = addr; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + break; + case 2: + cursor->state = 0; + cursor->addrl = addr; + cursor->xor_count++; + if (index) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + break; + } + + return rval; +} + +/** + * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that + * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call + */ +static void ppc440spe_adma_dma2rxor_set_src( + struct ppc440spe_adma_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct xor_cb *xcb = desc->hw_desc; + int k = 0, op = 0, lop = 0; + + /* get the RXOR operand which corresponds to index addr */ + while (op <= index) { + lop = op; + if (k == XOR_MAX_OPS) { + k = 0; + desc = list_entry(desc->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + xcb = desc->hw_desc; + + } + if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) == + (DMA_RXOR12 << DMA_CUED_REGION_OFF)) + op += 2; + else + op += 3; + } + + BUG_ON(k < 1); + + if (test_bit(k-1, desc->reverse_flags)) { + /* reverse operand order; put last op in RXOR group */ + if (index == op - 1) + ppc440spe_rxor_set_src(desc, k - 1, addr); + } else { + /* direct operand order; put first op in RXOR group */ + if (index == lop) + ppc440spe_rxor_set_src(desc, k - 1, addr); + } +} + +/** + * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that + * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call + */ +static void ppc440spe_adma_dma2rxor_set_mult( + struct ppc440spe_adma_desc_slot *desc, + int index, u8 mult) +{ + struct xor_cb *xcb = desc->hw_desc; + int k = 0, op = 0, lop = 0; + + /* get the RXOR operand which corresponds to index mult */ + while (op <= index) { + lop = op; + if (k == XOR_MAX_OPS) { + k = 0; + desc = list_entry(desc->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + xcb = desc->hw_desc; + + } + if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) == + (DMA_RXOR12 << DMA_CUED_REGION_OFF)) + op += 2; + else + op += 3; + } + + BUG_ON(k < 1); + if (test_bit(k-1, desc->reverse_flags)) { + /* reverse order */ + ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult); + } else { + /* direct order */ + ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult); + } +} + +/** + * ppc440spe_init_rxor_cursor - + */ +static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor) +{ + memset(cursor, 0, sizeof(struct ppc440spe_rxor)); + cursor->state = 2; +} + +/** + * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into + * descriptor for the PQXOR operation + */ +static void ppc440spe_adma_pq_set_src_mult( + struct ppc440spe_adma_desc_slot *sw_desc, + unsigned char mult, int index, int dst_pos) +{ + struct ppc440spe_adma_chan *chan; + u32 mult_idx, mult_dst; + struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + int region = test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags) ? 2 : 3; + + if (index < region) { + /* RXOR multipliers */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->dst_cnt - 1); + if (sw_desc->dst_cnt == 2) + iter1 = ppc440spe_get_group_entry( + sw_desc, 0); + + mult_idx = DMA_CUED_MULT1_OFF + (index << 3); + mult_dst = DMA_CDB_SG_SRC; + } else { + /* WXOR multiplier */ + iter = ppc440spe_get_group_entry(sw_desc, + index - region + + sw_desc->dst_cnt); + mult_idx = DMA_CUED_MULT1_OFF; + mult_dst = dst_pos ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + } + } else { + int znum = 0; + + /* WXOR-only; + * skip first slots with destinations (if ZERO_DST has + * place) + */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + znum++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + znum++; + + iter = ppc440spe_get_group_entry(sw_desc, index + znum); + mult_idx = DMA_CUED_MULT1_OFF; + mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1; + } + + if (likely(iter)) { + ppc440spe_desc_set_src_mult(iter, chan, + mult_idx, mult_dst, mult); + + if (unlikely(iter1)) { + /* if we have two destinations for RXOR, then + * we've just set Q mult. Set-up P now. + */ + ppc440spe_desc_set_src_mult(iter1, chan, + mult_idx, mult_dst, 1); + } + + } + break; + + case PPC440SPE_XOR_ID: + iter = sw_desc->group_head; + if (sw_desc->dst_cnt == 2) { + /* both P & Q calculations required; set P mult here */ + ppc440spe_adma_dma2rxor_set_mult(iter, index, 1); + + /* and then set Q mult */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + } + ppc440spe_adma_dma2rxor_set_mult(iter, index, mult); + break; + } +} + +/** + * ppc440spe_adma_free_chan_resources - free the resources allocated + */ +static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *iter, *_iter; + int in_use_descs = 0; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ppc440spe_adma_slot_cleanup(ppc440spe_chan); + + spin_lock_bh(&ppc440spe_chan->lock); + list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe_reverse(iter, _iter, + &ppc440spe_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + ppc440spe_chan->slots_allocated--; + } + ppc440spe_chan->last_used = NULL; + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d %s slots_allocated %d\n", + ppc440spe_chan->device->id, + __func__, ppc440spe_chan->slots_allocated); + spin_unlock_bh(&ppc440spe_chan->lock); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n", + in_use_descs - 1); +} + +/** + * ppc440spe_adma_tx_status - poll the status of an ADMA transaction + * @chan: ADMA channel handle + * @cookie: ADMA transaction identifier + * @txstate: a holder for the current state of the channel + */ +static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + enum dma_status ret; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_SUCCESS) + return ret; + + ppc440spe_adma_slot_cleanup(ppc440spe_chan); + + return dma_cookie_status(chan, cookie, txstate); +} + +/** + * ppc440spe_adma_eot_handler - end of transfer interrupt handler + */ +static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data) +{ + struct ppc440spe_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + tasklet_schedule(&chan->irq_tasklet); + ppc440spe_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +/** + * ppc440spe_adma_err_handler - DMA error interrupt handler; + * do the same things as a eot handler + */ +static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data) +{ + struct ppc440spe_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + tasklet_schedule(&chan->irq_tasklet); + ppc440spe_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +/** + * ppc440spe_test_callback - called when test operation has been done + */ +static void ppc440spe_test_callback(void *unused) +{ + complete(&ppc440spe_r6_test_comp); +} + +/** + * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w + */ +static void ppc440spe_adma_issue_pending(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id, + __func__, ppc440spe_chan->pending); + + if (ppc440spe_chan->pending) { + ppc440spe_chan->pending = 0; + ppc440spe_chan_append(ppc440spe_chan); + } +} + +/** + * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines + * use FIFOs (as opposite to chains used in XOR) so this is a XOR + * specific operation) + */ +static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + spin_lock_bh(&chan->lock); + slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op); + sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + list_splice_init(&sw_desc->group_list, &chan->chain); + async_tx_ack(&sw_desc->async_tx); + ppc440spe_desc_init_null_xor(group_start); + + cookie = dma_cookie_assign(&sw_desc->async_tx); + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + chan->common.completed_cookie = cookie - 1; + + /* channel should not be busy */ + BUG_ON(ppc440spe_chan_is_busy(chan)); + + /* set the descriptor address */ + ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc); + + /* run the descriptor */ + ppc440spe_chan_run(chan); + } else + printk(KERN_ERR "ppc440spe adma%d" + " failed to allocate null descriptor\n", + chan->device->id); + spin_unlock_bh(&chan->lock); +} + +/** + * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully. + * For this we just perform one WXOR operation with the same source + * and destination addresses, the GF-multiplier is 1; so if RAID-6 + * capabilities are enabled then we'll get src/dst filled with zero. + */ +static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *sw_desc, *iter; + struct page *pg; + char *a; + dma_addr_t dma_addr, addrs[2]; + unsigned long op = 0; + int rval = 0; + + set_bit(PPC440SPE_DESC_WXOR, &op); + + pg = alloc_page(GFP_KERNEL); + if (!pg) + return -ENOMEM; + + spin_lock_bh(&chan->lock); + sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1); + if (sw_desc) { + /* 1 src, 1 dsr, int_ena, WXOR */ + ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op); + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE); + iter->unmap_len = PAGE_SIZE; + } + } else { + rval = -EFAULT; + spin_unlock_bh(&chan->lock); + goto exit; + } + spin_unlock_bh(&chan->lock); + + /* Fill the test page with ones */ + memset(page_address(pg), 0xFF, PAGE_SIZE); + dma_addr = dma_map_page(chan->device->dev, pg, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Setup addresses */ + ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0); + ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0); + addrs[0] = dma_addr; + addrs[1] = 0; + ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q); + + async_tx_ack(&sw_desc->async_tx); + sw_desc->async_tx.callback = ppc440spe_test_callback; + sw_desc->async_tx.callback_param = NULL; + + init_completion(&ppc440spe_r6_test_comp); + + ppc440spe_adma_tx_submit(&sw_desc->async_tx); + ppc440spe_adma_issue_pending(&chan->common); + + wait_for_completion(&ppc440spe_r6_test_comp); + + /* Now check if the test page is zeroed */ + a = page_address(pg); + if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) { + /* page is zero - RAID-6 enabled */ + rval = 0; + } else { + /* RAID-6 was not enabled */ + rval = -EINVAL; + } +exit: + __free_page(pg); + return rval; +} + +static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) +{ + switch (adev->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_cap_set(DMA_MEMCPY, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + dma_cap_set(DMA_MEMSET, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask); + dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask); + break; + case PPC440SPE_XOR_ID: + dma_cap_set(DMA_XOR, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + adev->common.cap_mask = adev->common.cap_mask; + break; + } + + /* Set base routines */ + adev->common.device_alloc_chan_resources = + ppc440spe_adma_alloc_chan_resources; + adev->common.device_free_chan_resources = + ppc440spe_adma_free_chan_resources; + adev->common.device_tx_status = ppc440spe_adma_tx_status; + adev->common.device_issue_pending = ppc440spe_adma_issue_pending; + + /* Set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) { + adev->common.device_prep_dma_memcpy = + ppc440spe_adma_prep_dma_memcpy; + } + if (dma_has_cap(DMA_MEMSET, adev->common.cap_mask)) { + adev->common.device_prep_dma_memset = + ppc440spe_adma_prep_dma_memset; + } + if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) { + adev->common.max_xor = XOR_MAX_OPS; + adev->common.device_prep_dma_xor = + ppc440spe_adma_prep_dma_xor; + } + if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + dma_set_maxpq(&adev->common, + DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0); + break; + case PPC440SPE_DMA1_ID: + dma_set_maxpq(&adev->common, + DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0); + break; + case PPC440SPE_XOR_ID: + adev->common.max_pq = XOR_MAX_OPS * 3; + break; + } + adev->common.device_prep_dma_pq = + ppc440spe_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + adev->common.max_pq = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC440SPE_DMA1_ID: + adev->common.max_pq = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + adev->common.device_prep_dma_pq_val = + ppc440spe_adma_prep_dma_pqzero_sum; + } + if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + adev->common.max_xor = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC440SPE_DMA1_ID: + adev->common.max_xor = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + adev->common.device_prep_dma_xor_val = + ppc440spe_adma_prep_dma_xor_zero_sum; + } + if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) { + adev->common.device_prep_dma_interrupt = + ppc440spe_adma_prep_dma_interrupt; + } + pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: " + "( %s%s%s%s%s%s%s)\n", + dev_name(adev->dev), + dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "", + dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "", + dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "", + dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "", + dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "", + dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : ""); +} + +static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev, + struct ppc440spe_adma_chan *chan, + int *initcode) +{ + struct platform_device *ofdev; + struct device_node *np; + int ret; + + ofdev = container_of(adev->dev, struct platform_device, dev); + np = ofdev->dev.of_node; + if (adev->id != PPC440SPE_XOR_ID) { + adev->err_irq = irq_of_parse_and_map(np, 1); + if (adev->err_irq == NO_IRQ) { + dev_warn(adev->dev, "no err irq resource?\n"); + *initcode = PPC_ADMA_INIT_IRQ2; + adev->err_irq = -ENXIO; + } else + atomic_inc(&ppc440spe_adma_err_irq_ref); + } else { + adev->err_irq = -ENXIO; + } + + adev->irq = irq_of_parse_and_map(np, 0); + if (adev->irq == NO_IRQ) { + dev_err(adev->dev, "no irq resource\n"); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -ENXIO; + goto err_irq_map; + } + dev_dbg(adev->dev, "irq %d, err irq %d\n", + adev->irq, adev->err_irq); + + ret = request_irq(adev->irq, ppc440spe_adma_eot_handler, + 0, dev_driver_string(adev->dev), chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->irq); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -EIO; + goto err_req1; + } + + /* only DMA engines have a separate error IRQ + * so it's Ok if err_irq < 0 in XOR engine case. + */ + if (adev->err_irq > 0) { + /* both DMA engines share common error IRQ */ + ret = request_irq(adev->err_irq, + ppc440spe_adma_err_handler, + IRQF_SHARED, + dev_driver_string(adev->dev), + chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->err_irq); + *initcode = PPC_ADMA_INIT_IRQ2; + ret = -EIO; + goto err_req2; + } + } + + if (adev->id == PPC440SPE_XOR_ID) { + /* enable XOR engine interrupts */ + iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT, + &adev->xor_reg->ier); + } else { + u32 mask, enable; + + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + ret = -ENODEV; + goto err_req2; + } + adev->i2o_reg = of_iomap(np, 0); + if (!adev->i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + ret = -EINVAL; + goto err_req2; + } + of_node_put(np); + /* Unmask 'CS FIFO Attention' interrupts and + * enable generating interrupts on errors + */ + enable = (adev->id == PPC440SPE_DMA0_ID) ? + ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) & enable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + return 0; + +err_req2: + free_irq(adev->irq, chan); +err_req1: + irq_dispose_mapping(adev->irq); +err_irq_map: + if (adev->err_irq > 0) { + if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) + irq_dispose_mapping(adev->err_irq); + } + return ret; +} + +static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev, + struct ppc440spe_adma_chan *chan) +{ + u32 mask, disable; + + if (adev->id == PPC440SPE_XOR_ID) { + /* disable XOR engine interrupts */ + mask = ioread32be(&adev->xor_reg->ier); + mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT); + iowrite32be(mask, &adev->xor_reg->ier); + } else { + /* disable DMAx engine interrupts */ + disable = (adev->id == PPC440SPE_DMA0_ID) ? + (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) | disable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + free_irq(adev->irq, chan); + irq_dispose_mapping(adev->irq); + if (adev->err_irq > 0) { + free_irq(adev->err_irq, chan); + if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) { + irq_dispose_mapping(adev->err_irq); + iounmap(adev->i2o_reg); + } + } +} + +/** + * ppc440spe_adma_probe - probe the asynch device + */ +static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct resource res; + struct ppc440spe_adma_device *adev; + struct ppc440spe_adma_chan *chan; + struct ppc_dma_chan_ref *ref, *_ref; + int ret = 0, initcode = PPC_ADMA_INIT_OK; + const u32 *idx; + int len; + void *regs; + u32 id, pool_size; + + if (of_device_is_compatible(np, "amcc,xor-accelerator")) { + id = PPC440SPE_XOR_ID; + /* As far as the XOR engine is concerned, it does not + * use FIFOs but uses linked list. So there is no dependency + * between pool size to allocate and the engine configuration. + */ + pool_size = PAGE_SIZE << 1; + } else { + /* it is DMA0 or DMA1 */ + idx = of_get_property(np, "cell-index", &len); + if (!idx || (len != sizeof(u32))) { + dev_err(&ofdev->dev, "Device node %s has missing " + "or invalid cell-index property\n", + np->full_name); + return -EINVAL; + } + id = *idx; + /* DMA0,1 engines use FIFO to maintain CDBs, so we + * should allocate the pool accordingly to size of this + * FIFO. Thus, the pool size depends on the FIFO depth: + * how much CDBs pointers the FIFO may contain then so + * much CDBs we should provide in the pool. + * That is + * CDB size = 32B; + * CDBs number = (DMA0_FIFO_SIZE >> 3); + * Pool size = CDBs number * CDB size = + * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2. + */ + pool_size = (id == PPC440SPE_DMA0_ID) ? + DMA0_FIFO_SIZE : DMA1_FIFO_SIZE; + pool_size <<= 2; + } + + if (of_address_to_resource(np, 0, &res)) { + dev_err(&ofdev->dev, "failed to get memory resource\n"); + initcode = PPC_ADMA_INIT_MEMRES; + ret = -ENODEV; + goto out; + } + + if (!request_mem_region(res.start, resource_size(&res), + dev_driver_string(&ofdev->dev))) { + dev_err(&ofdev->dev, "failed to request memory region %pR\n", + &res); + initcode = PPC_ADMA_INIT_MEMREG; + ret = -EBUSY; + goto out; + } + + /* create a device */ + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) { + dev_err(&ofdev->dev, "failed to allocate device\n"); + initcode = PPC_ADMA_INIT_ALLOC; + ret = -ENOMEM; + goto err_adev_alloc; + } + + adev->id = id; + adev->pool_size = pool_size; + /* allocate coherent memory for hardware descriptors */ + adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev, + adev->pool_size, &adev->dma_desc_pool, + GFP_KERNEL); + if (adev->dma_desc_pool_virt == NULL) { + dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent " + "memory for hardware descriptors\n", + adev->pool_size); + initcode = PPC_ADMA_INIT_COHERENT; + ret = -ENOMEM; + goto err_dma_alloc; + } + dev_dbg(&ofdev->dev, "allocted descriptor pool virt 0x%p phys 0x%llx\n", + adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool); + + regs = ioremap(res.start, resource_size(&res)); + if (!regs) { + dev_err(&ofdev->dev, "failed to ioremap regs!\n"); + goto err_regs_alloc; + } + + if (adev->id == PPC440SPE_XOR_ID) { + adev->xor_reg = regs; + /* Reset XOR */ + iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr); + iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr); + } else { + size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ? + DMA0_FIFO_SIZE : DMA1_FIFO_SIZE; + adev->dma_reg = regs; + /* DMAx_FIFO_SIZE is defined in bytes, + * <fsiz> - is defined in number of CDB pointers (8byte). + * DMA FIFO Length = CSlength + CPlength, where + * CSlength = CPlength = (fsiz + 1) * 8. + */ + iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2), + &adev->dma_reg->fsiz); + /* Configure DMA engine */ + iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN, + &adev->dma_reg->cfg); + /* Clear Status */ + iowrite32(~0, &adev->dma_reg->dsts); + } + + adev->dev = &ofdev->dev; + adev->common.dev = &ofdev->dev; + INIT_LIST_HEAD(&adev->common.channels); + dev_set_drvdata(&ofdev->dev, adev); + + /* create a channel */ + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) { + dev_err(&ofdev->dev, "can't allocate channel structure\n"); + initcode = PPC_ADMA_INIT_CHANNEL; + ret = -ENOMEM; + goto err_chan_alloc; + } + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->chain); + INIT_LIST_HEAD(&chan->all_slots); + chan->device = adev; + chan->common.device = &adev->common; + dma_cookie_init(&chan->common); + list_add_tail(&chan->common.device_node, &adev->common.channels); + tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet, + (unsigned long)chan); + + /* allocate and map helper pages for async validation or + * async_mult/async_sum_product operations on DMA0/1. + */ + if (adev->id != PPC440SPE_XOR_ID) { + chan->pdest_page = alloc_page(GFP_KERNEL); + chan->qdest_page = alloc_page(GFP_KERNEL); + if (!chan->pdest_page || + !chan->qdest_page) { + if (chan->pdest_page) + __free_page(chan->pdest_page); + if (chan->qdest_page) + __free_page(chan->qdest_page); + ret = -ENOMEM; + goto err_page_alloc; + } + chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + ref = kmalloc(sizeof(*ref), GFP_KERNEL); + if (ref) { + ref->chan = &chan->common; + INIT_LIST_HEAD(&ref->node); + list_add_tail(&ref->node, &ppc440spe_adma_chan_list); + } else { + dev_err(&ofdev->dev, "failed to allocate channel reference!\n"); + ret = -ENOMEM; + goto err_ref_alloc; + } + + ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode); + if (ret) + goto err_irq; + + ppc440spe_adma_init_capabilities(adev); + + ret = dma_async_device_register(&adev->common); + if (ret) { + initcode = PPC_ADMA_INIT_REGISTER; + dev_err(&ofdev->dev, "failed to register dma device\n"); + goto err_dev_reg; + } + + goto out; + +err_dev_reg: + ppc440spe_adma_release_irqs(adev, chan); +err_irq: + list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) { + if (chan == to_ppc440spe_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } +err_ref_alloc: + if (adev->id != PPC440SPE_XOR_ID) { + dma_unmap_page(&ofdev->dev, chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(chan->pdest_page); + __free_page(chan->qdest_page); + } +err_page_alloc: + kfree(chan); +err_chan_alloc: + if (adev->id == PPC440SPE_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); +err_regs_alloc: + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, + adev->dma_desc_pool); +err_dma_alloc: + kfree(adev); +err_adev_alloc: + release_mem_region(res.start, resource_size(&res)); +out: + if (id < PPC440SPE_ADMA_ENGINES_NUM) + ppc440spe_adma_devices[id] = initcode; + + return ret; +} + +/** + * ppc440spe_adma_remove - remove the asynch device + */ +static int __devexit ppc440spe_adma_remove(struct platform_device *ofdev) +{ + struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev); + struct device_node *np = ofdev->dev.of_node; + struct resource res; + struct dma_chan *chan, *_chan; + struct ppc_dma_chan_ref *ref, *_ref; + struct ppc440spe_adma_chan *ppc440spe_chan; + + dev_set_drvdata(&ofdev->dev, NULL); + if (adev->id < PPC440SPE_ADMA_ENGINES_NUM) + ppc440spe_adma_devices[adev->id] = -1; + + dma_async_device_unregister(&adev->common); + + list_for_each_entry_safe(chan, _chan, &adev->common.channels, + device_node) { + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ppc440spe_adma_release_irqs(adev, ppc440spe_chan); + tasklet_kill(&ppc440spe_chan->irq_tasklet); + if (adev->id != PPC440SPE_XOR_ID) { + dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(ppc440spe_chan->pdest_page); + __free_page(ppc440spe_chan->qdest_page); + } + list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, + node) { + if (ppc440spe_chan == + to_ppc440spe_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } + list_del(&chan->device_node); + kfree(ppc440spe_chan); + } + + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + if (adev->id == PPC440SPE_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); + of_address_to_resource(np, 0, &res); + release_mem_region(res.start, resource_size(&res)); + kfree(adev); + return 0; +} + +/* + * /sys driver interface to enable h/w RAID-6 capabilities + * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/ + * directory are "devices", "enable" and "poly". + * "devices" shows available engines. + * "enable" is used to enable RAID-6 capabilities or to check + * whether these has been activated. + * "poly" allows setting/checking used polynomial (for PPC440SPe only). + */ + +static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + int i; + + for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) { + if (ppc440spe_adma_devices[i] == -1) + continue; + size += snprintf(buf + size, PAGE_SIZE - size, + "PPC440SP(E)-ADMA.%d: %s\n", i, + ppc_adma_errors[ppc440spe_adma_devices[i]]); + } + return size; +} + +static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf) +{ + return snprintf(buf, PAGE_SIZE, + "PPC440SP(e) RAID-6 capabilities are %sABLED.\n", + ppc440spe_r6_enabled ? "EN" : "DIS"); +} + +static ssize_t store_ppc440spe_r6enable(struct device_driver *dev, + const char *buf, size_t count) +{ + unsigned long val; + + if (!count || count > 11) + return -EINVAL; + + if (!ppc440spe_r6_tchan) + return -EFAULT; + + /* Write a key */ + sscanf(buf, "%lx", &val); + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val); + isync(); + + /* Verify whether it really works now */ + if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) { + pr_info("PPC440SP(e) RAID-6 has been activated " + "successfully\n"); + ppc440spe_r6_enabled = 1; + } else { + pr_info("PPC440SP(e) RAID-6 hasn't been activated!" + " Error key ?\n"); + ppc440spe_r6_enabled = 0; + } + return count; +} + +static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + u32 reg; + +#ifdef CONFIG_440SP + /* 440SP has fixed polynomial */ + reg = 0x4d; +#else + reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL); + reg >>= MQ0_CFBHL_POLY; + reg &= 0xFF; +#endif + + size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver " + "uses 0x1%02x polynomial.\n", reg); + return size; +} + +static ssize_t store_ppc440spe_r6poly(struct device_driver *dev, + const char *buf, size_t count) +{ + unsigned long reg, val; + +#ifdef CONFIG_440SP + /* 440SP uses default 0x14D polynomial only */ + return -EINVAL; +#endif + + if (!count || count > 6) + return -EINVAL; + + /* e.g., 0x14D or 0x11D */ + sscanf(buf, "%lx", &val); + + if (val & ~0x1FF) + return -EINVAL; + + val &= 0xFF; + reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL); + reg &= ~(0xFF << MQ0_CFBHL_POLY); + reg |= val << MQ0_CFBHL_POLY; + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg); + + return count; +} + +static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL); +static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable, + store_ppc440spe_r6enable); +static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly, + store_ppc440spe_r6poly); + +/* + * Common initialisation for RAID engines; allocate memory for + * DMAx FIFOs, perform configuration common for all DMA engines. + * Further DMA engine specific configuration is done at probe time. + */ +static int ppc440spe_configure_raid_devices(void) +{ + struct device_node *np; + struct resource i2o_res; + struct i2o_regs __iomem *i2o_reg; + dcr_host_t i2o_dcr_host; + unsigned int dcr_base, dcr_len; + int i, ret; + + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + return -ENODEV; + } + + if (of_address_to_resource(np, 0, &i2o_res)) { + of_node_put(np); + return -EINVAL; + } + + i2o_reg = of_iomap(np, 0); + if (!i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + return -EINVAL; + } + + /* Get I2O DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%s: can't get DCR registers base/len!\n", + np->full_name); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + + i2o_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(i2o_dcr_host)) { + pr_err("%s: failed to map DCRs!\n", np->full_name); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + of_node_put(np); + + /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share + * the base address of FIFO memory space. + * Actually we need twice more physical memory than programmed in the + * <fsiz> register (because there are two FIFOs for each DMA: CP and CS) + */ + ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1, + GFP_KERNEL); + if (!ppc440spe_dma_fifo_buf) { + pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__); + iounmap(i2o_reg); + dcr_unmap(i2o_dcr_host, dcr_len); + return -ENOMEM; + } + + /* + * Configure h/w + */ + /* Reset I2O/DMA */ + mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA); + mtdcri(SDR0, DCRN_SDR0_SRST, 0); + + /* Setup the base address of mmaped registers */ + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32)); + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) | + I2O_REG_ENABLE); + dcr_unmap(i2o_dcr_host, dcr_len); + + /* Setup FIFO memory space base address */ + iowrite32(0, &i2o_reg->ifbah); + iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal); + + /* set zero FIFO size for I2O, so the whole + * ppc440spe_dma_fifo_buf is used by DMAs. + * DMAx_FIFOs will be configured while probe. + */ + iowrite32(0, &i2o_reg->ifsiz); + iounmap(i2o_reg); + + /* To prepare WXOR/RXOR functionality we need access to + * Memory Queue Module DCRs (finally it will be enabled + * via /sys interface of the ppc440spe ADMA driver). + */ + np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe"); + if (!np) { + pr_err("%s: can't find MQ device tree node\n", + __func__); + ret = -ENODEV; + goto out_free; + } + + /* Get MQ DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%s: can't get DCR registers base/len!\n", + np->full_name); + ret = -ENODEV; + goto out_mq; + } + + ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) { + pr_err("%s: failed to map DCRs!\n", np->full_name); + ret = -ENODEV; + goto out_mq; + } + of_node_put(np); + ppc440spe_mq_dcr_len = dcr_len; + + /* Set HB alias */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB); + + /* Set: + * - LL transaction passing limit to 1; + * - Memory controller cycle limit to 1; + * - Galois Polynomial to 0x14d (default) + */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, + (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) | + (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY)); + + atomic_set(&ppc440spe_adma_err_irq_ref, 0); + for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) + ppc440spe_adma_devices[i] = -1; + + return 0; + +out_mq: + of_node_put(np); +out_free: + kfree(ppc440spe_dma_fifo_buf); + return ret; +} + +static const struct of_device_id ppc440spe_adma_of_match[] __devinitconst = { + { .compatible = "ibm,dma-440spe", }, + { .compatible = "amcc,xor-accelerator", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match); + +static struct platform_driver ppc440spe_adma_driver = { + .probe = ppc440spe_adma_probe, + .remove = __devexit_p(ppc440spe_adma_remove), + .driver = { + .name = "PPC440SP(E)-ADMA", + .owner = THIS_MODULE, + .of_match_table = ppc440spe_adma_of_match, + }, +}; + +static __init int ppc440spe_adma_init(void) +{ + int ret; + + ret = ppc440spe_configure_raid_devices(); + if (ret) + return ret; + + ret = platform_driver_register(&ppc440spe_adma_driver); + if (ret) { + pr_err("%s: failed to register platform driver\n", + __func__); + goto out_reg; + } + + /* Initialization status */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); + if (ret) + goto out_dev; + + /* RAID-6 h/w enable entry */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); + if (ret) + goto out_en; + + /* GF polynomial to use */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_poly); + if (!ret) + return ret; + + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); +out_en: + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); +out_dev: + /* User will not be able to enable h/w RAID-6 */ + pr_err("%s: failed to create RAID-6 driver interface\n", + __func__); + platform_driver_unregister(&ppc440spe_adma_driver); +out_reg: + dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len); + kfree(ppc440spe_dma_fifo_buf); + return ret; +} + +static void __exit ppc440spe_adma_exit(void) +{ + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_poly); + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); + platform_driver_unregister(&ppc440spe_adma_driver); + dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len); + kfree(ppc440spe_dma_fifo_buf); +} + +arch_initcall(ppc440spe_adma_init); +module_exit(ppc440spe_adma_exit); + +MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>"); +MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h new file mode 100644 index 00000000..26b7a5ed --- /dev/null +++ b/drivers/dma/ppc4xx/adma.h @@ -0,0 +1,193 @@ +/* + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov <yur@emcraft.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#ifndef _PPC440SPE_ADMA_H +#define _PPC440SPE_ADMA_H + +#include <linux/types.h> +#include "dma.h" +#include "xor.h" + +#define to_ppc440spe_adma_chan(chan) \ + container_of(chan, struct ppc440spe_adma_chan, common) +#define to_ppc440spe_adma_device(dev) \ + container_of(dev, struct ppc440spe_adma_device, common) +#define tx_to_ppc440spe_adma_slot(tx) \ + container_of(tx, struct ppc440spe_adma_desc_slot, async_tx) + +/* Default polynomial (for 440SP is only available) */ +#define PPC440SPE_DEFAULT_POLY 0x4d + +#define PPC440SPE_ADMA_ENGINES_NUM (XOR_ENGINES_NUM + DMA_ENGINES_NUM) + +#define PPC440SPE_ADMA_WATCHDOG_MSEC 3 +#define PPC440SPE_ADMA_THRESHOLD 1 + +#define PPC440SPE_DMA0_ID 0 +#define PPC440SPE_DMA1_ID 1 +#define PPC440SPE_XOR_ID 2 + +#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT 0xFFFFFFUL +/* this is the XOR_CBBCR width */ +#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT (1 << 31) +#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT + +#define PPC440SPE_RXOR_RUN 0 + +#define MQ0_CF2H_RXOR_BS_MASK 0x1FF + +#undef ADMA_LL_DEBUG + +/** + * struct ppc440spe_adma_device - internal representation of an ADMA device + * @dev: device + * @dma_reg: base for DMAx register access + * @xor_reg: base for XOR register access + * @i2o_reg: base for I2O register access + * @id: HW ADMA Device selector + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @pool_size: size of the pool + * @irq: DMAx or XOR irq number + * @err_irq: DMAx error irq number + * @common: embedded struct dma_device + */ +struct ppc440spe_adma_device { + struct device *dev; + struct dma_regs __iomem *dma_reg; + struct xor_regs __iomem *xor_reg; + struct i2o_regs __iomem *i2o_reg; + int id; + void *dma_desc_pool_virt; + dma_addr_t dma_desc_pool; + size_t pool_size; + int irq; + int err_irq; + struct dma_device common; +}; + +/** + * struct ppc440spe_adma_chan - internal representation of an ADMA channel + * @lock: serializes enqueue/dequeue operations to the slot pool + * @device: parent device + * @chain: device chain view of the descriptors + * @common: common dmaengine channel object members + * @all_slots: complete domain of slots usable by the channel + * @pending: allows batching of hardware operations + * @slots_allocated: records the actual size of the descriptor slot pool + * @hw_chain_inited: h/w descriptor chain initialization flag + * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs + * @needs_unmap: if buffers should not be unmapped upon final processing + * @pdest_page: P destination page for async validate operation + * @qdest_page: Q destination page for async validate operation + * @pdest: P dma addr for async validate operation + * @qdest: Q dma addr for async validate operation + */ +struct ppc440spe_adma_chan { + spinlock_t lock; + struct ppc440spe_adma_device *device; + struct list_head chain; + struct dma_chan common; + struct list_head all_slots; + struct ppc440spe_adma_desc_slot *last_used; + int pending; + int slots_allocated; + int hw_chain_inited; + struct tasklet_struct irq_tasklet; + u8 needs_unmap; + struct page *pdest_page; + struct page *qdest_page; + dma_addr_t pdest; + dma_addr_t qdest; +}; + +struct ppc440spe_rxor { + u32 addrl; + u32 addrh; + int len; + int xor_count; + int addr_count; + int desc_count; + int state; +}; + +/** + * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @hw_next: pointer to the next descriptor in chain + * @async_tx: support for the async_tx api + * @slot_node: node on the iop_adma_chan.all_slots list + * @chain_node: node on the op_adma_chan.chain list + * @group_list: list of slots that make up a multi-descriptor transaction + * for example transfer lengths larger than the supported hw max + * @unmap_len: transaction bytecount + * @hw_desc: virtual address of the hardware descriptor chain + * @stride: currently chained or not + * @idx: pool index + * @slot_cnt: total slots used in an transaction (group of operations) + * @src_cnt: number of sources set in this descriptor + * @dst_cnt: number of destinations set in the descriptor + * @slots_per_op: number of slots per operation + * @descs_per_op: number of slot per P/Q operation see comment + * for ppc440spe_prep_dma_pqxor function + * @flags: desc state/type + * @reverse_flags: 1 if a corresponding rxor address uses reversed address order + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct ppc440spe_adma_desc_slot { + dma_addr_t phys; + struct ppc440spe_adma_desc_slot *group_head; + struct ppc440spe_adma_desc_slot *hw_next; + struct dma_async_tx_descriptor async_tx; + struct list_head slot_node; + struct list_head chain_node; /* node in channel ops list */ + struct list_head group_list; /* list */ + unsigned int unmap_len; + void *hw_desc; + u16 stride; + u16 idx; + u16 slot_cnt; + u8 src_cnt; + u8 dst_cnt; + u8 slots_per_op; + u8 descs_per_op; + unsigned long flags; + unsigned long reverse_flags[8]; + +#define PPC440SPE_DESC_INT 0 /* generate interrupt on complete */ +#define PPC440SPE_ZERO_P 1 /* clear P destionaion */ +#define PPC440SPE_ZERO_Q 2 /* clear Q destination */ +#define PPC440SPE_COHERENT 3 /* src/dst are coherent */ + +#define PPC440SPE_DESC_WXOR 4 /* WXORs are in chain */ +#define PPC440SPE_DESC_RXOR 5 /* RXOR is in chain */ + +#define PPC440SPE_DESC_RXOR123 8 /* CDB for RXOR123 operation */ +#define PPC440SPE_DESC_RXOR124 9 /* CDB for RXOR124 operation */ +#define PPC440SPE_DESC_RXOR125 10 /* CDB for RXOR125 operation */ +#define PPC440SPE_DESC_RXOR12 11 /* CDB for RXOR12 operation */ +#define PPC440SPE_DESC_RXOR_REV 12 /* CDB has srcs in reversed order */ + +#define PPC440SPE_DESC_PCHECK 13 +#define PPC440SPE_DESC_QCHECK 14 + +#define PPC440SPE_DESC_RXOR_MSK 0x3 + + struct ppc440spe_rxor rxor_cursor; + + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +}; + +#endif /* _PPC440SPE_ADMA_H */ diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h new file mode 100644 index 00000000..bcde2df2 --- /dev/null +++ b/drivers/dma/ppc4xx/dma.h @@ -0,0 +1,223 @@ +/* + * 440SPe's DMA engines support header file + * + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov <yur@emcraft.com> + * + * This file is licensed under the term of the GNU General Public License + * version 2. The program licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef _PPC440SPE_DMA_H +#define _PPC440SPE_DMA_H + +#include <linux/types.h> + +/* Number of elements in the array with statical CDBs */ +#define MAX_STAT_DMA_CDBS 16 +/* Number of DMA engines available on the contoller */ +#define DMA_ENGINES_NUM 2 + +/* Maximum h/w supported number of destinations */ +#define DMA_DEST_MAX_NUM 2 + +/* FIFO's params */ +#define DMA0_FIFO_SIZE 0x1000 +#define DMA1_FIFO_SIZE 0x1000 +#define DMA_FIFO_ENABLE (1<<12) + +/* DMA Configuration Register. Data Transfer Engine PLB Priority: */ +#define DMA_CFG_DXEPR_LP (0<<26) +#define DMA_CFG_DXEPR_HP (3<<26) +#define DMA_CFG_DXEPR_HHP (2<<26) +#define DMA_CFG_DXEPR_HHHP (1<<26) + +/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */ +#define DMA_CFG_DFMPP_LP (0<<23) +#define DMA_CFG_DFMPP_HP (3<<23) +#define DMA_CFG_DFMPP_HHP (2<<23) +#define DMA_CFG_DFMPP_HHHP (1<<23) + +/* DMA Configuration Register. Force 64-byte Alignment */ +#define DMA_CFG_FALGN (1 << 19) + +/*UIC0:*/ +#define D0CPF_INT (1<<12) +#define D0CSF_INT (1<<11) +#define D1CPF_INT (1<<10) +#define D1CSF_INT (1<<9) +/*UIC1:*/ +#define DMAE_INT (1<<9) + +/* I2O IOP Interrupt Mask Register */ +#define I2O_IOPIM_P0SNE (1<<3) +#define I2O_IOPIM_P0EM (1<<5) +#define I2O_IOPIM_P1SNE (1<<6) +#define I2O_IOPIM_P1EM (1<<8) + +/* DMA CDB fields */ +#define DMA_CDB_MSK (0xF) +#define DMA_CDB_64B_ADDR (1<<2) +#define DMA_CDB_NO_INT (1<<3) +#define DMA_CDB_STATUS_MSK (0x3) +#define DMA_CDB_ADDR_MSK (0xFFFFFFF0) + +/* DMA CDB OpCodes */ +#define DMA_CDB_OPC_NO_OP (0x00) +#define DMA_CDB_OPC_MV_SG1_SG2 (0x01) +#define DMA_CDB_OPC_MULTICAST (0x05) +#define DMA_CDB_OPC_DFILL128 (0x24) +#define DMA_CDB_OPC_DCHECK128 (0x23) + +#define DMA_CUED_XOR_BASE (0x10000000) +#define DMA_CUED_XOR_HB (0x00000008) + +#ifdef CONFIG_440SP +#define DMA_CUED_MULT1_OFF 0 +#define DMA_CUED_MULT2_OFF 8 +#define DMA_CUED_MULT3_OFF 16 +#define DMA_CUED_REGION_OFF 24 +#define DMA_CUED_XOR_WIN_MSK (0xFC000000) +#else +#define DMA_CUED_MULT1_OFF 2 +#define DMA_CUED_MULT2_OFF 10 +#define DMA_CUED_MULT3_OFF 18 +#define DMA_CUED_REGION_OFF 26 +#define DMA_CUED_XOR_WIN_MSK (0xF0000000) +#endif + +#define DMA_CUED_REGION_MSK 0x3 +#define DMA_RXOR123 0x0 +#define DMA_RXOR124 0x1 +#define DMA_RXOR125 0x2 +#define DMA_RXOR12 0x3 + +/* S/G addresses */ +#define DMA_CDB_SG_SRC 1 +#define DMA_CDB_SG_DST1 2 +#define DMA_CDB_SG_DST2 3 + +/* + * DMAx engines Command Descriptor Block Type + */ +struct dma_cdb { + /* + * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf) + */ + u8 pad0[2]; /* reserved */ + u8 attr; /* attributes */ + u8 opc; /* opcode */ + u32 sg1u; /* upper SG1 address */ + u32 sg1l; /* lower SG1 address */ + u32 cnt; /* SG count, 3B used */ + u32 sg2u; /* upper SG2 address */ + u32 sg2l; /* lower SG2 address */ + u32 sg3u; /* upper SG3 address */ + u32 sg3l; /* lower SG3 address */ +}; + +/* + * DMAx hardware registers (p.515 in 440SPe UM 1.22) + */ +struct dma_regs { + u32 cpfpl; + u32 cpfph; + u32 csfpl; + u32 csfph; + u32 dsts; + u32 cfg; + u8 pad0[0x8]; + u16 cpfhp; + u16 cpftp; + u16 csfhp; + u16 csftp; + u8 pad1[0x8]; + u32 acpl; + u32 acph; + u32 s1bpl; + u32 s1bph; + u32 s2bpl; + u32 s2bph; + u32 s3bpl; + u32 s3bph; + u8 pad2[0x10]; + u32 earl; + u32 earh; + u8 pad3[0x8]; + u32 seat; + u32 sead; + u32 op; + u32 fsiz; +}; + +/* + * I2O hardware registers (p.528 in 440SPe UM 1.22) + */ +struct i2o_regs { + u32 ists; + u32 iseat; + u32 isead; + u8 pad0[0x14]; + u32 idbel; + u8 pad1[0xc]; + u32 ihis; + u32 ihim; + u8 pad2[0x8]; + u32 ihiq; + u32 ihoq; + u8 pad3[0x8]; + u32 iopis; + u32 iopim; + u32 iopiq; + u8 iopoq; + u8 pad4[3]; + u16 iiflh; + u16 iiflt; + u16 iiplh; + u16 iiplt; + u16 ioflh; + u16 ioflt; + u16 ioplh; + u16 ioplt; + u32 iidc; + u32 ictl; + u32 ifcpp; + u8 pad5[0x4]; + u16 mfac0; + u16 mfac1; + u16 mfac2; + u16 mfac3; + u16 mfac4; + u16 mfac5; + u16 mfac6; + u16 mfac7; + u16 ifcfh; + u16 ifcht; + u8 pad6[0x4]; + u32 iifmc; + u32 iodb; + u32 iodbc; + u32 ifbal; + u32 ifbah; + u32 ifsiz; + u32 ispd0; + u32 ispd1; + u32 ispd2; + u32 ispd3; + u32 ihipl; + u32 ihiph; + u32 ihopl; + u32 ihoph; + u32 iiipl; + u32 iiiph; + u32 iiopl; + u32 iioph; + u32 ifcpl; + u32 ifcph; + u8 pad7[0x8]; + u32 iopt; +}; + +#endif /* _PPC440SPE_DMA_H */ diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h new file mode 100644 index 00000000..daed7384 --- /dev/null +++ b/drivers/dma/ppc4xx/xor.h @@ -0,0 +1,110 @@ +/* + * 440SPe's XOR engines support header file + * + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov <yur@emcraft.com> + * + * This file is licensed under the term of the GNU General Public License + * version 2. The program licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef _PPC440SPE_XOR_H +#define _PPC440SPE_XOR_H + +#include <linux/types.h> + +/* Number of XOR engines available on the contoller */ +#define XOR_ENGINES_NUM 1 + +/* Number of operands supported in the h/w */ +#define XOR_MAX_OPS 16 + +/* + * XOR Command Block Control Register bits + */ +#define XOR_CBCR_LNK_BIT (1<<31) /* link present */ +#define XOR_CBCR_TGT_BIT (1<<30) /* target present */ +#define XOR_CBCR_CBCE_BIT (1<<29) /* command block compete enable */ +#define XOR_CBCR_RNZE_BIT (1<<28) /* result not zero enable */ +#define XOR_CBCR_XNOR_BIT (1<<15) /* XOR/XNOR */ +#define XOR_CDCR_OAC_MSK (0x7F) /* operand address count */ + +/* + * XORCore Status Register bits + */ +#define XOR_SR_XCP_BIT (1<<31) /* core processing */ +#define XOR_SR_ICB_BIT (1<<17) /* invalid CB */ +#define XOR_SR_IC_BIT (1<<16) /* invalid command */ +#define XOR_SR_IPE_BIT (1<<15) /* internal parity error */ +#define XOR_SR_RNZ_BIT (1<<2) /* result not Zero */ +#define XOR_SR_CBC_BIT (1<<1) /* CB complete */ +#define XOR_SR_CBLC_BIT (1<<0) /* CB list complete */ + +/* + * XORCore Control Set and Reset Register bits + */ +#define XOR_CRSR_XASR_BIT (1<<31) /* soft reset */ +#define XOR_CRSR_XAE_BIT (1<<30) /* enable */ +#define XOR_CRSR_RCBE_BIT (1<<29) /* refetch CB enable */ +#define XOR_CRSR_PAUS_BIT (1<<28) /* pause */ +#define XOR_CRSR_64BA_BIT (1<<27) /* 64/32 CB format */ +#define XOR_CRSR_CLP_BIT (1<<25) /* continue list processing */ + +/* + * XORCore Interrupt Enable Register + */ +#define XOR_IE_ICBIE_BIT (1<<17) /* Invalid Command Block IRQ Enable */ +#define XOR_IE_ICIE_BIT (1<<16) /* Invalid Command IRQ Enable */ +#define XOR_IE_RPTIE_BIT (1<<14) /* Read PLB Timeout Error IRQ Enable */ +#define XOR_IE_CBCIE_BIT (1<<1) /* CB complete interrupt enable */ +#define XOR_IE_CBLCI_BIT (1<<0) /* CB list complete interrupt enable */ + +/* + * XOR Accelerator engine Command Block Type + */ +struct xor_cb { + /* + * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf) + */ + u32 cbc; /* control */ + u32 cbbc; /* byte count */ + u32 cbs; /* status */ + u8 pad0[4]; /* reserved */ + u32 cbtah; /* target address high */ + u32 cbtal; /* target address low */ + u32 cblah; /* link address high */ + u32 cblal; /* link address low */ + struct { + u32 h; + u32 l; + } __attribute__ ((packed)) ops[16]; +} __attribute__ ((packed)); + +/* + * XOR hardware registers Table 19-3, UM 1.22 + */ +struct xor_regs { + u32 op_ar[16][2]; /* operand address[0]-high,[1]-low registers */ + u8 pad0[352]; /* reserved */ + u32 cbcr; /* CB control register */ + u32 cbbcr; /* CB byte count register */ + u32 cbsr; /* CB status register */ + u8 pad1[4]; /* reserved */ + u32 cbtahr; /* operand target address high register */ + u32 cbtalr; /* operand target address low register */ + u32 cblahr; /* CB link address high register */ + u32 cblalr; /* CB link address low register */ + u32 crsr; /* control set register */ + u32 crrr; /* control reset register */ + u32 ccbahr; /* current CB address high register */ + u32 ccbalr; /* current CB address low register */ + u32 plbr; /* PLB configuration register */ + u32 ier; /* interrupt enable register */ + u32 pecr; /* parity error count register */ + u32 sr; /* status register */ + u32 revidr; /* revision ID register */ +}; + +#endif /* _PPC440SPE_XOR_H */ diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c new file mode 100644 index 00000000..ec78ccef --- /dev/null +++ b/drivers/dma/sa11x0-dma.c @@ -0,0 +1,1109 @@ +/* + * SA11x0 DMAengine support + * + * Copyright (C) 2012 Russell King + * Derived in part from arch/arm/mach-sa1100/dma.c, + * Copyright (C) 2000, 2001 by Nicolas Pitre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/sched.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/sa11x0-dma.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#define NR_PHY_CHAN 6 +#define DMA_ALIGN 3 +#define DMA_MAX_SIZE 0x1fff +#define DMA_CHUNK_SIZE 0x1000 + +#define DMA_DDAR 0x00 +#define DMA_DCSR_S 0x04 +#define DMA_DCSR_C 0x08 +#define DMA_DCSR_R 0x0c +#define DMA_DBSA 0x10 +#define DMA_DBTA 0x14 +#define DMA_DBSB 0x18 +#define DMA_DBTB 0x1c +#define DMA_SIZE 0x20 + +#define DCSR_RUN (1 << 0) +#define DCSR_IE (1 << 1) +#define DCSR_ERROR (1 << 2) +#define DCSR_DONEA (1 << 3) +#define DCSR_STRTA (1 << 4) +#define DCSR_DONEB (1 << 5) +#define DCSR_STRTB (1 << 6) +#define DCSR_BIU (1 << 7) + +#define DDAR_RW (1 << 0) /* 0 = W, 1 = R */ +#define DDAR_E (1 << 1) /* 0 = LE, 1 = BE */ +#define DDAR_BS (1 << 2) /* 0 = BS4, 1 = BS8 */ +#define DDAR_DW (1 << 3) /* 0 = 8b, 1 = 16b */ +#define DDAR_Ser0UDCTr (0x0 << 4) +#define DDAR_Ser0UDCRc (0x1 << 4) +#define DDAR_Ser1SDLCTr (0x2 << 4) +#define DDAR_Ser1SDLCRc (0x3 << 4) +#define DDAR_Ser1UARTTr (0x4 << 4) +#define DDAR_Ser1UARTRc (0x5 << 4) +#define DDAR_Ser2ICPTr (0x6 << 4) +#define DDAR_Ser2ICPRc (0x7 << 4) +#define DDAR_Ser3UARTTr (0x8 << 4) +#define DDAR_Ser3UARTRc (0x9 << 4) +#define DDAR_Ser4MCP0Tr (0xa << 4) +#define DDAR_Ser4MCP0Rc (0xb << 4) +#define DDAR_Ser4MCP1Tr (0xc << 4) +#define DDAR_Ser4MCP1Rc (0xd << 4) +#define DDAR_Ser4SSPTr (0xe << 4) +#define DDAR_Ser4SSPRc (0xf << 4) + +struct sa11x0_dma_sg { + u32 addr; + u32 len; +}; + +struct sa11x0_dma_desc { + struct dma_async_tx_descriptor tx; + u32 ddar; + size_t size; + + /* maybe protected by c->lock */ + struct list_head node; + unsigned sglen; + struct sa11x0_dma_sg sg[0]; +}; + +struct sa11x0_dma_phy; + +struct sa11x0_dma_chan { + struct dma_chan chan; + spinlock_t lock; + dma_cookie_t lc; + + /* protected by c->lock */ + struct sa11x0_dma_phy *phy; + enum dma_status status; + struct list_head desc_submitted; + struct list_head desc_issued; + + /* protected by d->lock */ + struct list_head node; + + u32 ddar; + const char *name; +}; + +struct sa11x0_dma_phy { + void __iomem *base; + struct sa11x0_dma_dev *dev; + unsigned num; + + struct sa11x0_dma_chan *vchan; + + /* Protected by c->lock */ + unsigned sg_load; + struct sa11x0_dma_desc *txd_load; + unsigned sg_done; + struct sa11x0_dma_desc *txd_done; +#ifdef CONFIG_PM_SLEEP + u32 dbs[2]; + u32 dbt[2]; + u32 dcsr; +#endif +}; + +struct sa11x0_dma_dev { + struct dma_device slave; + void __iomem *base; + spinlock_t lock; + struct tasklet_struct task; + struct list_head chan_pending; + struct list_head desc_complete; + struct sa11x0_dma_phy phy[NR_PHY_CHAN]; +}; + +static struct sa11x0_dma_chan *to_sa11x0_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct sa11x0_dma_chan, chan); +} + +static struct sa11x0_dma_dev *to_sa11x0_dma(struct dma_device *dmadev) +{ + return container_of(dmadev, struct sa11x0_dma_dev, slave); +} + +static struct sa11x0_dma_desc *to_sa11x0_dma_tx(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct sa11x0_dma_desc, tx); +} + +static struct sa11x0_dma_desc *sa11x0_dma_next_desc(struct sa11x0_dma_chan *c) +{ + if (list_empty(&c->desc_issued)) + return NULL; + + return list_first_entry(&c->desc_issued, struct sa11x0_dma_desc, node); +} + +static void sa11x0_dma_start_desc(struct sa11x0_dma_phy *p, struct sa11x0_dma_desc *txd) +{ + list_del(&txd->node); + p->txd_load = txd; + p->sg_load = 0; + + dev_vdbg(p->dev->slave.dev, "pchan %u: txd %p[%x]: starting: DDAR:%x\n", + p->num, txd, txd->tx.cookie, txd->ddar); +} + +static void noinline sa11x0_dma_start_sg(struct sa11x0_dma_phy *p, + struct sa11x0_dma_chan *c) +{ + struct sa11x0_dma_desc *txd = p->txd_load; + struct sa11x0_dma_sg *sg; + void __iomem *base = p->base; + unsigned dbsx, dbtx; + u32 dcsr; + + if (!txd) + return; + + dcsr = readl_relaxed(base + DMA_DCSR_R); + + /* Don't try to load the next transfer if both buffers are started */ + if ((dcsr & (DCSR_STRTA | DCSR_STRTB)) == (DCSR_STRTA | DCSR_STRTB)) + return; + + if (p->sg_load == txd->sglen) { + struct sa11x0_dma_desc *txn = sa11x0_dma_next_desc(c); + + /* + * We have reached the end of the current descriptor. + * Peek at the next descriptor, and if compatible with + * the current, start processing it. + */ + if (txn && txn->ddar == txd->ddar) { + txd = txn; + sa11x0_dma_start_desc(p, txn); + } else { + p->txd_load = NULL; + return; + } + } + + sg = &txd->sg[p->sg_load++]; + + /* Select buffer to load according to channel status */ + if (((dcsr & (DCSR_BIU | DCSR_STRTB)) == (DCSR_BIU | DCSR_STRTB)) || + ((dcsr & (DCSR_BIU | DCSR_STRTA)) == 0)) { + dbsx = DMA_DBSA; + dbtx = DMA_DBTA; + dcsr = DCSR_STRTA | DCSR_IE | DCSR_RUN; + } else { + dbsx = DMA_DBSB; + dbtx = DMA_DBTB; + dcsr = DCSR_STRTB | DCSR_IE | DCSR_RUN; + } + + writel_relaxed(sg->addr, base + dbsx); + writel_relaxed(sg->len, base + dbtx); + writel(dcsr, base + DMA_DCSR_S); + + dev_dbg(p->dev->slave.dev, "pchan %u: load: DCSR:%02x DBS%c:%08x DBT%c:%08x\n", + p->num, dcsr, + 'A' + (dbsx == DMA_DBSB), sg->addr, + 'A' + (dbtx == DMA_DBTB), sg->len); +} + +static void noinline sa11x0_dma_complete(struct sa11x0_dma_phy *p, + struct sa11x0_dma_chan *c) +{ + struct sa11x0_dma_desc *txd = p->txd_done; + + if (++p->sg_done == txd->sglen) { + struct sa11x0_dma_dev *d = p->dev; + + dev_vdbg(d->slave.dev, "pchan %u: txd %p[%x]: completed\n", + p->num, p->txd_done, p->txd_done->tx.cookie); + + c->lc = txd->tx.cookie; + + spin_lock(&d->lock); + list_add_tail(&txd->node, &d->desc_complete); + spin_unlock(&d->lock); + + p->sg_done = 0; + p->txd_done = p->txd_load; + + tasklet_schedule(&d->task); + } + + sa11x0_dma_start_sg(p, c); +} + +static irqreturn_t sa11x0_dma_irq(int irq, void *dev_id) +{ + struct sa11x0_dma_phy *p = dev_id; + struct sa11x0_dma_dev *d = p->dev; + struct sa11x0_dma_chan *c; + u32 dcsr; + + dcsr = readl_relaxed(p->base + DMA_DCSR_R); + if (!(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB))) + return IRQ_NONE; + + /* Clear reported status bits */ + writel_relaxed(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB), + p->base + DMA_DCSR_C); + + dev_dbg(d->slave.dev, "pchan %u: irq: DCSR:%02x\n", p->num, dcsr); + + if (dcsr & DCSR_ERROR) { + dev_err(d->slave.dev, "pchan %u: error. DCSR:%02x DDAR:%08x DBSA:%08x DBTA:%08x DBSB:%08x DBTB:%08x\n", + p->num, dcsr, + readl_relaxed(p->base + DMA_DDAR), + readl_relaxed(p->base + DMA_DBSA), + readl_relaxed(p->base + DMA_DBTA), + readl_relaxed(p->base + DMA_DBSB), + readl_relaxed(p->base + DMA_DBTB)); + } + + c = p->vchan; + if (c) { + unsigned long flags; + + spin_lock_irqsave(&c->lock, flags); + /* + * Now that we're holding the lock, check that the vchan + * really is associated with this pchan before touching the + * hardware. This should always succeed, because we won't + * change p->vchan or c->phy while the channel is actively + * transferring. + */ + if (c->phy == p) { + if (dcsr & DCSR_DONEA) + sa11x0_dma_complete(p, c); + if (dcsr & DCSR_DONEB) + sa11x0_dma_complete(p, c); + } + spin_unlock_irqrestore(&c->lock, flags); + } + + return IRQ_HANDLED; +} + +static void sa11x0_dma_start_txd(struct sa11x0_dma_chan *c) +{ + struct sa11x0_dma_desc *txd = sa11x0_dma_next_desc(c); + + /* If the issued list is empty, we have no further txds to process */ + if (txd) { + struct sa11x0_dma_phy *p = c->phy; + + sa11x0_dma_start_desc(p, txd); + p->txd_done = txd; + p->sg_done = 0; + + /* The channel should not have any transfers started */ + WARN_ON(readl_relaxed(p->base + DMA_DCSR_R) & + (DCSR_STRTA | DCSR_STRTB)); + + /* Clear the run and start bits before changing DDAR */ + writel_relaxed(DCSR_RUN | DCSR_STRTA | DCSR_STRTB, + p->base + DMA_DCSR_C); + writel_relaxed(txd->ddar, p->base + DMA_DDAR); + + /* Try to start both buffers */ + sa11x0_dma_start_sg(p, c); + sa11x0_dma_start_sg(p, c); + } +} + +static void sa11x0_dma_tasklet(unsigned long arg) +{ + struct sa11x0_dma_dev *d = (struct sa11x0_dma_dev *)arg; + struct sa11x0_dma_phy *p; + struct sa11x0_dma_chan *c; + struct sa11x0_dma_desc *txd, *txn; + LIST_HEAD(head); + unsigned pch, pch_alloc = 0; + + dev_dbg(d->slave.dev, "tasklet enter\n"); + + /* Get the completed tx descriptors */ + spin_lock_irq(&d->lock); + list_splice_init(&d->desc_complete, &head); + spin_unlock_irq(&d->lock); + + list_for_each_entry(txd, &head, node) { + c = to_sa11x0_dma_chan(txd->tx.chan); + + dev_dbg(d->slave.dev, "vchan %p: txd %p[%x] completed\n", + c, txd, txd->tx.cookie); + + spin_lock_irq(&c->lock); + p = c->phy; + if (p) { + if (!p->txd_done) + sa11x0_dma_start_txd(c); + if (!p->txd_done) { + /* No current txd associated with this channel */ + dev_dbg(d->slave.dev, "pchan %u: free\n", p->num); + + /* Mark this channel free */ + c->phy = NULL; + p->vchan = NULL; + } + } + spin_unlock_irq(&c->lock); + } + + spin_lock_irq(&d->lock); + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + p = &d->phy[pch]; + + if (p->vchan == NULL && !list_empty(&d->chan_pending)) { + c = list_first_entry(&d->chan_pending, + struct sa11x0_dma_chan, node); + list_del_init(&c->node); + + pch_alloc |= 1 << pch; + + /* Mark this channel allocated */ + p->vchan = c; + + dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, c); + } + } + spin_unlock_irq(&d->lock); + + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + if (pch_alloc & (1 << pch)) { + p = &d->phy[pch]; + c = p->vchan; + + spin_lock_irq(&c->lock); + c->phy = p; + + sa11x0_dma_start_txd(c); + spin_unlock_irq(&c->lock); + } + } + + /* Now free the completed tx descriptor, and call their callbacks */ + list_for_each_entry_safe(txd, txn, &head, node) { + dma_async_tx_callback callback = txd->tx.callback; + void *callback_param = txd->tx.callback_param; + + dev_dbg(d->slave.dev, "txd %p[%x]: callback and free\n", + txd, txd->tx.cookie); + + kfree(txd); + + if (callback) + callback(callback_param); + } + + dev_dbg(d->slave.dev, "tasklet exit\n"); +} + + +static void sa11x0_dma_desc_free(struct sa11x0_dma_dev *d, struct list_head *head) +{ + struct sa11x0_dma_desc *txd, *txn; + + list_for_each_entry_safe(txd, txn, head, node) { + dev_dbg(d->slave.dev, "txd %p: freeing\n", txd); + kfree(txd); + } +} + +static int sa11x0_dma_alloc_chan_resources(struct dma_chan *chan) +{ + return 0; +} + +static void sa11x0_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&c->lock, flags); + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + + list_splice_tail_init(&c->desc_submitted, &head); + list_splice_tail_init(&c->desc_issued, &head); + spin_unlock_irqrestore(&c->lock, flags); + + sa11x0_dma_desc_free(d, &head); +} + +static dma_addr_t sa11x0_dma_pos(struct sa11x0_dma_phy *p) +{ + unsigned reg; + u32 dcsr; + + dcsr = readl_relaxed(p->base + DMA_DCSR_R); + + if ((dcsr & (DCSR_BIU | DCSR_STRTA)) == DCSR_STRTA || + (dcsr & (DCSR_BIU | DCSR_STRTB)) == DCSR_BIU) + reg = DMA_DBSA; + else + reg = DMA_DBSB; + + return readl_relaxed(p->base + reg); +} + +static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + struct sa11x0_dma_phy *p; + struct sa11x0_dma_desc *txd; + dma_cookie_t last_used, last_complete; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + last_used = c->chan.cookie; + last_complete = c->lc; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) { + dma_set_tx_state(state, last_complete, last_used, 0); + return ret; + } + + spin_lock_irqsave(&c->lock, flags); + p = c->phy; + ret = c->status; + if (p) { + dma_addr_t addr = sa11x0_dma_pos(p); + + dev_vdbg(d->slave.dev, "tx_status: addr:%x\n", addr); + + txd = p->txd_done; + if (txd) { + unsigned i; + + for (i = 0; i < txd->sglen; i++) { + dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x\n", + i, txd->sg[i].addr, txd->sg[i].len); + if (addr >= txd->sg[i].addr && + addr < txd->sg[i].addr + txd->sg[i].len) { + unsigned len; + + len = txd->sg[i].len - + (addr - txd->sg[i].addr); + dev_vdbg(d->slave.dev, "tx_status: [%u] +%x\n", + i, len); + bytes += len; + i++; + break; + } + } + for (; i < txd->sglen; i++) { + dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x ++\n", + i, txd->sg[i].addr, txd->sg[i].len); + bytes += txd->sg[i].len; + } + } + if (txd != p->txd_load && p->txd_load) + bytes += p->txd_load->size; + } + list_for_each_entry(txd, &c->desc_issued, node) { + bytes += txd->size; + } + spin_unlock_irqrestore(&c->lock, flags); + + dma_set_tx_state(state, last_complete, last_used, bytes); + + dev_vdbg(d->slave.dev, "tx_status: bytes 0x%zx\n", bytes); + + return ret; +} + +/* + * Move pending txds to the issued list, and re-init pending list. + * If not already pending, add this channel to the list of pending + * channels and trigger the tasklet to run. + */ +static void sa11x0_dma_issue_pending(struct dma_chan *chan) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&c->lock, flags); + list_splice_tail_init(&c->desc_submitted, &c->desc_issued); + if (!list_empty(&c->desc_issued)) { + spin_lock(&d->lock); + if (!c->phy && list_empty(&c->node)) { + list_add_tail(&c->node, &d->chan_pending); + tasklet_schedule(&d->task); + dev_dbg(d->slave.dev, "vchan %p: issued\n", c); + } + spin_unlock(&d->lock); + } else + dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", c); + spin_unlock_irqrestore(&c->lock, flags); +} + +static dma_cookie_t sa11x0_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(tx->chan); + struct sa11x0_dma_desc *txd = to_sa11x0_dma_tx(tx); + unsigned long flags; + + spin_lock_irqsave(&c->lock, flags); + c->chan.cookie += 1; + if (c->chan.cookie < 0) + c->chan.cookie = 1; + txd->tx.cookie = c->chan.cookie; + + list_add_tail(&txd->node, &c->desc_submitted); + spin_unlock_irqrestore(&c->lock, flags); + + dev_dbg(tx->chan->device->dev, "vchan %p: txd %p[%x]: submitted\n", + c, txd, txd->tx.cookie); + + return txd->tx.cookie; +} + +static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sg, unsigned int sglen, + enum dma_transfer_direction dir, unsigned long flags, void *context) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_desc *txd; + struct scatterlist *sgent; + unsigned i, j = sglen; + size_t size = 0; + + /* SA11x0 channels can only operate in their native direction */ + if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) { + dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n", + c, c->ddar, dir); + return NULL; + } + + /* Do not allow zero-sized txds */ + if (sglen == 0) + return NULL; + + for_each_sg(sg, sgent, sglen, i) { + dma_addr_t addr = sg_dma_address(sgent); + unsigned int len = sg_dma_len(sgent); + + if (len > DMA_MAX_SIZE) + j += DIV_ROUND_UP(len, DMA_MAX_SIZE & ~DMA_ALIGN) - 1; + if (addr & DMA_ALIGN) { + dev_dbg(chan->device->dev, "vchan %p: bad buffer alignment: %08x\n", + c, addr); + return NULL; + } + } + + txd = kzalloc(sizeof(*txd) + j * sizeof(txd->sg[0]), GFP_ATOMIC); + if (!txd) { + dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", c); + return NULL; + } + + j = 0; + for_each_sg(sg, sgent, sglen, i) { + dma_addr_t addr = sg_dma_address(sgent); + unsigned len = sg_dma_len(sgent); + + size += len; + + do { + unsigned tlen = len; + + /* + * Check whether the transfer will fit. If not, try + * to split the transfer up such that we end up with + * equal chunks - but make sure that we preserve the + * alignment. This avoids small segments. + */ + if (tlen > DMA_MAX_SIZE) { + unsigned mult = DIV_ROUND_UP(tlen, + DMA_MAX_SIZE & ~DMA_ALIGN); + + tlen = (tlen / mult) & ~DMA_ALIGN; + } + + txd->sg[j].addr = addr; + txd->sg[j].len = tlen; + + addr += tlen; + len -= tlen; + j++; + } while (len); + } + + dma_async_tx_descriptor_init(&txd->tx, &c->chan); + txd->tx.flags = flags; + txd->tx.tx_submit = sa11x0_dma_tx_submit; + txd->ddar = c->ddar; + txd->size = size; + txd->sglen = j; + + dev_dbg(chan->device->dev, "vchan %p: txd %p: size %u nr %u\n", + c, txd, txd->size, txd->sglen); + + return &txd->tx; +} + +static int sa11x0_dma_slave_config(struct sa11x0_dma_chan *c, struct dma_slave_config *cfg) +{ + u32 ddar = c->ddar & ((0xf << 4) | DDAR_RW); + dma_addr_t addr; + enum dma_slave_buswidth width; + u32 maxburst; + + if (ddar & DDAR_RW) { + addr = cfg->src_addr; + width = cfg->src_addr_width; + maxburst = cfg->src_maxburst; + } else { + addr = cfg->dst_addr; + width = cfg->dst_addr_width; + maxburst = cfg->dst_maxburst; + } + + if ((width != DMA_SLAVE_BUSWIDTH_1_BYTE && + width != DMA_SLAVE_BUSWIDTH_2_BYTES) || + (maxburst != 4 && maxburst != 8)) + return -EINVAL; + + if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + ddar |= DDAR_DW; + if (maxburst == 8) + ddar |= DDAR_BS; + + dev_dbg(c->chan.device->dev, "vchan %p: dma_slave_config addr %x width %u burst %u\n", + c, addr, width, maxburst); + + c->ddar = ddar | (addr & 0xf0000000) | (addr & 0x003ffffc) << 6; + + return 0; +} + +static int sa11x0_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device); + struct sa11x0_dma_phy *p; + LIST_HEAD(head); + unsigned long flags; + int ret; + + switch (cmd) { + case DMA_SLAVE_CONFIG: + return sa11x0_dma_slave_config(c, (struct dma_slave_config *)arg); + + case DMA_TERMINATE_ALL: + dev_dbg(d->slave.dev, "vchan %p: terminate all\n", c); + /* Clear the tx descriptor lists */ + spin_lock_irqsave(&c->lock, flags); + list_splice_tail_init(&c->desc_submitted, &head); + list_splice_tail_init(&c->desc_issued, &head); + + p = c->phy; + if (p) { + struct sa11x0_dma_desc *txd, *txn; + + dev_dbg(d->slave.dev, "pchan %u: terminating\n", p->num); + /* vchan is assigned to a pchan - stop the channel */ + writel(DCSR_RUN | DCSR_IE | + DCSR_STRTA | DCSR_DONEA | + DCSR_STRTB | DCSR_DONEB, + p->base + DMA_DCSR_C); + + list_for_each_entry_safe(txd, txn, &d->desc_complete, node) + if (txd->tx.chan == &c->chan) + list_move(&txd->node, &head); + + if (p->txd_load) { + if (p->txd_load != p->txd_done) + list_add_tail(&p->txd_load->node, &head); + p->txd_load = NULL; + } + if (p->txd_done) { + list_add_tail(&p->txd_done->node, &head); + p->txd_done = NULL; + } + c->phy = NULL; + spin_lock(&d->lock); + p->vchan = NULL; + spin_unlock(&d->lock); + tasklet_schedule(&d->task); + } + spin_unlock_irqrestore(&c->lock, flags); + sa11x0_dma_desc_free(d, &head); + ret = 0; + break; + + case DMA_PAUSE: + dev_dbg(d->slave.dev, "vchan %p: pause\n", c); + spin_lock_irqsave(&c->lock, flags); + if (c->status == DMA_IN_PROGRESS) { + c->status = DMA_PAUSED; + + p = c->phy; + if (p) { + writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C); + } else { + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + } + } + spin_unlock_irqrestore(&c->lock, flags); + ret = 0; + break; + + case DMA_RESUME: + dev_dbg(d->slave.dev, "vchan %p: resume\n", c); + spin_lock_irqsave(&c->lock, flags); + if (c->status == DMA_PAUSED) { + c->status = DMA_IN_PROGRESS; + + p = c->phy; + if (p) { + writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_S); + } else if (!list_empty(&c->desc_issued)) { + spin_lock(&d->lock); + list_add_tail(&c->node, &d->chan_pending); + spin_unlock(&d->lock); + } + } + spin_unlock_irqrestore(&c->lock, flags); + ret = 0; + break; + + default: + ret = -ENXIO; + break; + } + + return ret; +} + +struct sa11x0_dma_channel_desc { + u32 ddar; + const char *name; +}; + +#define CD(d1, d2) { .ddar = DDAR_##d1 | d2, .name = #d1 } +static const struct sa11x0_dma_channel_desc chan_desc[] = { + CD(Ser0UDCTr, 0), + CD(Ser0UDCRc, DDAR_RW), + CD(Ser1SDLCTr, 0), + CD(Ser1SDLCRc, DDAR_RW), + CD(Ser1UARTTr, 0), + CD(Ser1UARTRc, DDAR_RW), + CD(Ser2ICPTr, 0), + CD(Ser2ICPRc, DDAR_RW), + CD(Ser3UARTTr, 0), + CD(Ser3UARTRc, DDAR_RW), + CD(Ser4MCP0Tr, 0), + CD(Ser4MCP0Rc, DDAR_RW), + CD(Ser4MCP1Tr, 0), + CD(Ser4MCP1Rc, DDAR_RW), + CD(Ser4SSPTr, 0), + CD(Ser4SSPRc, DDAR_RW), +}; + +static int __devinit sa11x0_dma_init_dmadev(struct dma_device *dmadev, + struct device *dev) +{ + unsigned i; + + dmadev->chancnt = ARRAY_SIZE(chan_desc); + INIT_LIST_HEAD(&dmadev->channels); + dmadev->dev = dev; + dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources; + dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources; + dmadev->device_control = sa11x0_dma_control; + dmadev->device_tx_status = sa11x0_dma_tx_status; + dmadev->device_issue_pending = sa11x0_dma_issue_pending; + + for (i = 0; i < dmadev->chancnt; i++) { + struct sa11x0_dma_chan *c; + + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) { + dev_err(dev, "no memory for channel %u\n", i); + return -ENOMEM; + } + + c->chan.device = dmadev; + c->status = DMA_IN_PROGRESS; + c->ddar = chan_desc[i].ddar; + c->name = chan_desc[i].name; + spin_lock_init(&c->lock); + INIT_LIST_HEAD(&c->desc_submitted); + INIT_LIST_HEAD(&c->desc_issued); + INIT_LIST_HEAD(&c->node); + list_add_tail(&c->chan.device_node, &dmadev->channels); + } + + return dma_async_device_register(dmadev); +} + +static int sa11x0_dma_request_irq(struct platform_device *pdev, int nr, + void *data) +{ + int irq = platform_get_irq(pdev, nr); + + if (irq <= 0) + return -ENXIO; + + return request_irq(irq, sa11x0_dma_irq, 0, dev_name(&pdev->dev), data); +} + +static void sa11x0_dma_free_irq(struct platform_device *pdev, int nr, + void *data) +{ + int irq = platform_get_irq(pdev, nr); + if (irq > 0) + free_irq(irq, data); +} + +static void sa11x0_dma_free_channels(struct dma_device *dmadev) +{ + struct sa11x0_dma_chan *c, *cn; + + list_for_each_entry_safe(c, cn, &dmadev->channels, chan.device_node) { + list_del(&c->chan.device_node); + kfree(c); + } +} + +static int __devinit sa11x0_dma_probe(struct platform_device *pdev) +{ + struct sa11x0_dma_dev *d; + struct resource *res; + unsigned i; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENXIO; + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + ret = -ENOMEM; + goto err_alloc; + } + + spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->chan_pending); + INIT_LIST_HEAD(&d->desc_complete); + + d->base = ioremap(res->start, resource_size(res)); + if (!d->base) { + ret = -ENOMEM; + goto err_ioremap; + } + + tasklet_init(&d->task, sa11x0_dma_tasklet, (unsigned long)d); + + for (i = 0; i < NR_PHY_CHAN; i++) { + struct sa11x0_dma_phy *p = &d->phy[i]; + + p->dev = d; + p->num = i; + p->base = d->base + i * DMA_SIZE; + writel_relaxed(DCSR_RUN | DCSR_IE | DCSR_ERROR | + DCSR_DONEA | DCSR_STRTA | DCSR_DONEB | DCSR_STRTB, + p->base + DMA_DCSR_C); + writel_relaxed(0, p->base + DMA_DDAR); + + ret = sa11x0_dma_request_irq(pdev, i, p); + if (ret) { + while (i) { + i--; + sa11x0_dma_free_irq(pdev, i, &d->phy[i]); + } + goto err_irq; + } + } + + dma_cap_set(DMA_SLAVE, d->slave.cap_mask); + d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg; + ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev); + if (ret) { + dev_warn(d->slave.dev, "failed to register slave async device: %d\n", + ret); + goto err_slave_reg; + } + + platform_set_drvdata(pdev, d); + return 0; + + err_slave_reg: + sa11x0_dma_free_channels(&d->slave); + for (i = 0; i < NR_PHY_CHAN; i++) + sa11x0_dma_free_irq(pdev, i, &d->phy[i]); + err_irq: + tasklet_kill(&d->task); + iounmap(d->base); + err_ioremap: + kfree(d); + err_alloc: + return ret; +} + +static int __devexit sa11x0_dma_remove(struct platform_device *pdev) +{ + struct sa11x0_dma_dev *d = platform_get_drvdata(pdev); + unsigned pch; + + dma_async_device_unregister(&d->slave); + + sa11x0_dma_free_channels(&d->slave); + for (pch = 0; pch < NR_PHY_CHAN; pch++) + sa11x0_dma_free_irq(pdev, pch, &d->phy[pch]); + tasklet_kill(&d->task); + iounmap(d->base); + kfree(d); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int sa11x0_dma_suspend(struct device *dev) +{ + struct sa11x0_dma_dev *d = dev_get_drvdata(dev); + unsigned pch; + + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + struct sa11x0_dma_phy *p = &d->phy[pch]; + u32 dcsr, saved_dcsr; + + dcsr = saved_dcsr = readl_relaxed(p->base + DMA_DCSR_R); + if (dcsr & DCSR_RUN) { + writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C); + dcsr = readl_relaxed(p->base + DMA_DCSR_R); + } + + saved_dcsr &= DCSR_RUN | DCSR_IE; + if (dcsr & DCSR_BIU) { + p->dbs[0] = readl_relaxed(p->base + DMA_DBSB); + p->dbt[0] = readl_relaxed(p->base + DMA_DBTB); + p->dbs[1] = readl_relaxed(p->base + DMA_DBSA); + p->dbt[1] = readl_relaxed(p->base + DMA_DBTA); + saved_dcsr |= (dcsr & DCSR_STRTA ? DCSR_STRTB : 0) | + (dcsr & DCSR_STRTB ? DCSR_STRTA : 0); + } else { + p->dbs[0] = readl_relaxed(p->base + DMA_DBSA); + p->dbt[0] = readl_relaxed(p->base + DMA_DBTA); + p->dbs[1] = readl_relaxed(p->base + DMA_DBSB); + p->dbt[1] = readl_relaxed(p->base + DMA_DBTB); + saved_dcsr |= dcsr & (DCSR_STRTA | DCSR_STRTB); + } + p->dcsr = saved_dcsr; + + writel(DCSR_STRTA | DCSR_STRTB, p->base + DMA_DCSR_C); + } + + return 0; +} + +static int sa11x0_dma_resume(struct device *dev) +{ + struct sa11x0_dma_dev *d = dev_get_drvdata(dev); + unsigned pch; + + for (pch = 0; pch < NR_PHY_CHAN; pch++) { + struct sa11x0_dma_phy *p = &d->phy[pch]; + struct sa11x0_dma_desc *txd = NULL; + u32 dcsr = readl_relaxed(p->base + DMA_DCSR_R); + + WARN_ON(dcsr & (DCSR_BIU | DCSR_STRTA | DCSR_STRTB | DCSR_RUN)); + + if (p->txd_done) + txd = p->txd_done; + else if (p->txd_load) + txd = p->txd_load; + + if (!txd) + continue; + + writel_relaxed(txd->ddar, p->base + DMA_DDAR); + + writel_relaxed(p->dbs[0], p->base + DMA_DBSA); + writel_relaxed(p->dbt[0], p->base + DMA_DBTA); + writel_relaxed(p->dbs[1], p->base + DMA_DBSB); + writel_relaxed(p->dbt[1], p->base + DMA_DBTB); + writel_relaxed(p->dcsr, p->base + DMA_DCSR_S); + } + + return 0; +} +#endif + +static const struct dev_pm_ops sa11x0_dma_pm_ops = { + .suspend_noirq = sa11x0_dma_suspend, + .resume_noirq = sa11x0_dma_resume, + .freeze_noirq = sa11x0_dma_suspend, + .thaw_noirq = sa11x0_dma_resume, + .poweroff_noirq = sa11x0_dma_suspend, + .restore_noirq = sa11x0_dma_resume, +}; + +static struct platform_driver sa11x0_dma_driver = { + .driver = { + .name = "sa11x0-dma", + .owner = THIS_MODULE, + .pm = &sa11x0_dma_pm_ops, + }, + .probe = sa11x0_dma_probe, + .remove = __devexit_p(sa11x0_dma_remove), +}; + +bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param) +{ + if (chan->device->dev->driver == &sa11x0_dma_driver.driver) { + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + const char *p = param; + + return !strcmp(c->name, p); + } + return false; +} +EXPORT_SYMBOL(sa11x0_dma_filter_fn); + +static int __init sa11x0_dma_init(void) +{ + return platform_driver_register(&sa11x0_dma_driver); +} +subsys_initcall(sa11x0_dma_init); + +static void __exit sa11x0_dma_exit(void) +{ + platform_driver_unregister(&sa11x0_dma_driver); +} +module_exit(sa11x0_dma_exit); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("SA-11x0 DMA driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:sa11x0-dma"); diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c new file mode 100644 index 00000000..19d7a8d3 --- /dev/null +++ b/drivers/dma/shdma.c @@ -0,0 +1,1524 @@ +/* + * Renesas SuperH DMA Engine support + * + * base is drivers/dma/flsdma.c + * + * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * - DMA of SuperH does not have Hardware DMA chain mode. + * - MAX DMA size is 16MB. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/sh_dma.h> +#include <linux/notifier.h> +#include <linux/kdebug.h> +#include <linux/spinlock.h> +#include <linux/rculist.h> + +#include "dmaengine.h" +#include "shdma.h" + +/* DMA descriptor control */ +enum sh_dmae_desc_status { + DESC_IDLE, + DESC_PREPARED, + DESC_SUBMITTED, + DESC_COMPLETED, /* completed, have to call callback */ + DESC_WAITING, /* callback called, waiting for ack / re-submit */ +}; + +#define NR_DESCS_PER_CHANNEL 32 +/* Default MEMCPY transfer size = 2^2 = 4 bytes */ +#define LOG2_DEFAULT_XFER_SIZE 2 + +/* + * Used for write-side mutual exclusion for the global device list, + * read-side synchronization by way of RCU, and per-controller data. + */ +static DEFINE_SPINLOCK(sh_dmae_lock); +static LIST_HEAD(sh_dmae_devices); + +/* A bitmask with bits enough for enum sh_dmae_slave_chan_id */ +static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)]; + +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all); +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan); + +static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_dc); + + __raw_writel(data, shdev->chan_reg + + shdev->pdata->channel[sh_dc->id].chclr_offset); +} + +static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) +{ + __raw_writel(data, sh_dc->base + reg / sizeof(u32)); +} + +static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg) +{ + return __raw_readl(sh_dc->base + reg / sizeof(u32)); +} + +static u16 dmaor_read(struct sh_dmae_device *shdev) +{ + u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32); + + if (shdev->pdata->dmaor_is_32bit) + return __raw_readl(addr); + else + return __raw_readw(addr); +} + +static void dmaor_write(struct sh_dmae_device *shdev, u16 data) +{ + u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32); + + if (shdev->pdata->dmaor_is_32bit) + __raw_writel(data, addr); + else + __raw_writew(data, addr); +} + +static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_dc); + + __raw_writel(data, sh_dc->base + shdev->chcr_offset / sizeof(u32)); +} + +static u32 chcr_read(struct sh_dmae_chan *sh_dc) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_dc); + + return __raw_readl(sh_dc->base + shdev->chcr_offset / sizeof(u32)); +} + +/* + * Reset DMA controller + * + * SH7780 has two DMAOR register + */ +static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev) +{ + unsigned short dmaor; + unsigned long flags; + + spin_lock_irqsave(&sh_dmae_lock, flags); + + dmaor = dmaor_read(shdev); + dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME)); + + spin_unlock_irqrestore(&sh_dmae_lock, flags); +} + +static int sh_dmae_rst(struct sh_dmae_device *shdev) +{ + unsigned short dmaor; + unsigned long flags; + + spin_lock_irqsave(&sh_dmae_lock, flags); + + dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME); + + if (shdev->pdata->chclr_present) { + int i; + for (i = 0; i < shdev->pdata->channel_num; i++) { + struct sh_dmae_chan *sh_chan = shdev->chan[i]; + if (sh_chan) + chclr_write(sh_chan, 0); + } + } + + dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init); + + dmaor = dmaor_read(shdev); + + spin_unlock_irqrestore(&sh_dmae_lock, flags); + + if (dmaor & (DMAOR_AE | DMAOR_NMIF)) { + dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n"); + return -EIO; + } + if (shdev->pdata->dmaor_init & ~dmaor) + dev_warn(shdev->common.dev, + "DMAOR=0x%x hasn't latched the initial value 0x%x.\n", + dmaor, shdev->pdata->dmaor_init); + return 0; +} + +static bool dmae_is_busy(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = chcr_read(sh_chan); + + if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE) + return true; /* working */ + + return false; /* waiting */ +} + +static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + struct sh_dmae_pdata *pdata = shdev->pdata; + int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) | + ((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift); + + if (cnt >= pdata->ts_shift_num) + cnt = 0; + + return pdata->ts_shift[cnt]; +} + +static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + struct sh_dmae_pdata *pdata = shdev->pdata; + int i; + + for (i = 0; i < pdata->ts_shift_num; i++) + if (pdata->ts_shift[i] == l2size) + break; + + if (i == pdata->ts_shift_num) + i = 0; + + return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) | + ((i << pdata->ts_high_shift) & pdata->ts_high_mask); +} + +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw) +{ + sh_dmae_writel(sh_chan, hw->sar, SAR); + sh_dmae_writel(sh_chan, hw->dar, DAR); + sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR); +} + +static void dmae_start(struct sh_dmae_chan *sh_chan) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + u32 chcr = chcr_read(sh_chan); + + if (shdev->pdata->needs_tend_set) + sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND); + + chcr |= CHCR_DE | shdev->chcr_ie_bit; + chcr_write(sh_chan, chcr & ~CHCR_TE); +} + +static void dmae_halt(struct sh_dmae_chan *sh_chan) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + u32 chcr = chcr_read(sh_chan); + + chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit); + chcr_write(sh_chan, chcr); +} + +static void dmae_init(struct sh_dmae_chan *sh_chan) +{ + /* + * Default configuration for dual address memory-memory transfer. + * 0x400 represents auto-request. + */ + u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan, + LOG2_DEFAULT_XFER_SIZE); + sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr); + chcr_write(sh_chan, chcr); +} + +static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) +{ + /* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */ + if (dmae_is_busy(sh_chan)) + return -EBUSY; + + sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val); + chcr_write(sh_chan, val); + + return 0; +} + +static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + struct sh_dmae_pdata *pdata = shdev->pdata; + const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->id]; + u16 __iomem *addr = shdev->dmars; + unsigned int shift = chan_pdata->dmars_bit; + + if (dmae_is_busy(sh_chan)) + return -EBUSY; + + if (pdata->no_dmars) + return 0; + + /* in the case of a missing DMARS resource use first memory window */ + if (!addr) + addr = (u16 __iomem *)shdev->chan_reg; + addr += chan_pdata->dmars / sizeof(u16); + + __raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift), + addr); + + return 0; +} + +static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; + struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + struct sh_dmae_slave *param = tx->chan->private; + dma_async_tx_callback callback = tx->callback; + dma_cookie_t cookie; + bool power_up; + + spin_lock_irq(&sh_chan->desc_lock); + + if (list_empty(&sh_chan->ld_queue)) + power_up = true; + else + power_up = false; + + cookie = dma_cookie_assign(tx); + + /* Mark all chunks of this descriptor as submitted, move to the queue */ + list_for_each_entry_safe(chunk, c, desc->node.prev, node) { + /* + * All chunks are on the global ld_free, so, we have to find + * the end of the chain ourselves + */ + if (chunk != desc && (chunk->mark == DESC_IDLE || + chunk->async_tx.cookie > 0 || + chunk->async_tx.cookie == -EBUSY || + &chunk->node == &sh_chan->ld_free)) + break; + chunk->mark = DESC_SUBMITTED; + /* Callback goes to the last chunk */ + chunk->async_tx.callback = NULL; + chunk->cookie = cookie; + list_move_tail(&chunk->node, &sh_chan->ld_queue); + last = chunk; + } + + last->async_tx.callback = callback; + last->async_tx.callback_param = tx->callback_param; + + dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n", + tx->cookie, &last->async_tx, sh_chan->id, + desc->hw.sar, desc->hw.tcr, desc->hw.dar); + + if (power_up) { + sh_chan->pm_state = DMAE_PM_BUSY; + + pm_runtime_get(sh_chan->dev); + + spin_unlock_irq(&sh_chan->desc_lock); + + pm_runtime_barrier(sh_chan->dev); + + spin_lock_irq(&sh_chan->desc_lock); + + /* Have we been reset, while waiting? */ + if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) { + dev_dbg(sh_chan->dev, "Bring up channel %d\n", + sh_chan->id); + if (param) { + const struct sh_dmae_slave_config *cfg = + param->config; + + dmae_set_dmars(sh_chan, cfg->mid_rid); + dmae_set_chcr(sh_chan, cfg->chcr); + } else { + dmae_init(sh_chan); + } + + if (sh_chan->pm_state == DMAE_PM_PENDING) + sh_chan_xfer_ld_queue(sh_chan); + sh_chan->pm_state = DMAE_PM_ESTABLISHED; + } + } else { + sh_chan->pm_state = DMAE_PM_PENDING; + } + + spin_unlock_irq(&sh_chan->desc_lock); + + return cookie; +} + +/* Called with desc_lock held */ +static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc; + + list_for_each_entry(desc, &sh_chan->ld_free, node) + if (desc->mark != DESC_PREPARED) { + BUG_ON(desc->mark != DESC_IDLE); + list_del(&desc->node); + return desc; + } + + return NULL; +} + +static const struct sh_dmae_slave_config *sh_dmae_find_slave( + struct sh_dmae_chan *sh_chan, struct sh_dmae_slave *param) +{ + struct sh_dmae_device *shdev = to_sh_dev(sh_chan); + struct sh_dmae_pdata *pdata = shdev->pdata; + int i; + + if (param->slave_id >= SH_DMA_SLAVE_NUMBER) + return NULL; + + for (i = 0; i < pdata->slave_num; i++) + if (pdata->slave[i].slave_id == param->slave_id) + return pdata->slave + i; + + return NULL; +} + +static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc; + struct sh_dmae_slave *param = chan->private; + int ret; + + /* + * This relies on the guarantee from dmaengine that alloc_chan_resources + * never runs concurrently with itself or free_chan_resources. + */ + if (param) { + const struct sh_dmae_slave_config *cfg; + + cfg = sh_dmae_find_slave(sh_chan, param); + if (!cfg) { + ret = -EINVAL; + goto efindslave; + } + + if (test_and_set_bit(param->slave_id, sh_dmae_slave_used)) { + ret = -EBUSY; + goto etestused; + } + + param->config = cfg; + } + + while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { + desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL); + if (!desc) + break; + dma_async_tx_descriptor_init(&desc->async_tx, + &sh_chan->common); + desc->async_tx.tx_submit = sh_dmae_tx_submit; + desc->mark = DESC_IDLE; + + list_add(&desc->node, &sh_chan->ld_free); + sh_chan->descs_allocated++; + } + + if (!sh_chan->descs_allocated) { + ret = -ENOMEM; + goto edescalloc; + } + + return sh_chan->descs_allocated; + +edescalloc: + if (param) + clear_bit(param->slave_id, sh_dmae_slave_used); +etestused: +efindslave: + chan->private = NULL; + return ret; +} + +/* + * sh_dma_free_chan_resources - Free all resources of the channel. + */ +static void sh_dmae_free_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc, *_desc; + LIST_HEAD(list); + + /* Protect against ISR */ + spin_lock_irq(&sh_chan->desc_lock); + dmae_halt(sh_chan); + spin_unlock_irq(&sh_chan->desc_lock); + + /* Now no new interrupts will occur */ + + /* Prepared and not submitted descriptors can still be on the queue */ + if (!list_empty(&sh_chan->ld_queue)) + sh_dmae_chan_ld_cleanup(sh_chan, true); + + if (chan->private) { + /* The caller is holding dma_list_mutex */ + struct sh_dmae_slave *param = chan->private; + clear_bit(param->slave_id, sh_dmae_slave_used); + chan->private = NULL; + } + + spin_lock_irq(&sh_chan->desc_lock); + + list_splice_init(&sh_chan->ld_free, &list); + sh_chan->descs_allocated = 0; + + spin_unlock_irq(&sh_chan->desc_lock); + + list_for_each_entry_safe(desc, _desc, &list, node) + kfree(desc); +} + +/** + * sh_dmae_add_desc - get, set up and return one transfer descriptor + * @sh_chan: DMA channel + * @flags: DMA transfer flags + * @dest: destination DMA address, incremented when direction equals + * DMA_DEV_TO_MEM + * @src: source DMA address, incremented when direction equals + * DMA_MEM_TO_DEV + * @len: DMA transfer length + * @first: if NULL, set to the current descriptor and cookie set to -EBUSY + * @direction: needed for slave DMA to decide which address to keep constant, + * equals DMA_MEM_TO_MEM for MEMCPY + * Returns 0 or an error + * Locks: called with desc_lock held + */ +static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan, + unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len, + struct sh_desc **first, enum dma_transfer_direction direction) +{ + struct sh_desc *new; + size_t copy_size; + + if (!*len) + return NULL; + + /* Allocate the link descriptor from the free list */ + new = sh_dmae_get_desc(sh_chan); + if (!new) { + dev_err(sh_chan->dev, "No free link descriptor available\n"); + return NULL; + } + + copy_size = min(*len, (size_t)SH_DMA_TCR_MAX + 1); + + new->hw.sar = *src; + new->hw.dar = *dest; + new->hw.tcr = copy_size; + + if (!*first) { + /* First desc */ + new->async_tx.cookie = -EBUSY; + *first = new; + } else { + /* Other desc - invisible to the user */ + new->async_tx.cookie = -EINVAL; + } + + dev_dbg(sh_chan->dev, + "chaining (%u/%u)@%x -> %x with %p, cookie %d, shift %d\n", + copy_size, *len, *src, *dest, &new->async_tx, + new->async_tx.cookie, sh_chan->xmit_shift); + + new->mark = DESC_PREPARED; + new->async_tx.flags = flags; + new->direction = direction; + + *len -= copy_size; + if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV) + *src += copy_size; + if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM) + *dest += copy_size; + + return new; +} + +/* + * sh_dmae_prep_sg - prepare transfer descriptors from an SG list + * + * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also + * converted to scatter-gather to guarantee consistent locking and a correct + * list manipulation. For slave DMA direction carries the usual meaning, and, + * logically, the SG list is RAM and the addr variable contains slave address, + * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM + * and the SG list contains only one element and points at the source buffer. + */ +static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan, + struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr, + enum dma_transfer_direction direction, unsigned long flags) +{ + struct scatterlist *sg; + struct sh_desc *first = NULL, *new = NULL /* compiler... */; + LIST_HEAD(tx_list); + int chunks = 0; + unsigned long irq_flags; + int i; + + if (!sg_len) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) + chunks += (sg_dma_len(sg) + SH_DMA_TCR_MAX) / + (SH_DMA_TCR_MAX + 1); + + /* Have to lock the whole loop to protect against concurrent release */ + spin_lock_irqsave(&sh_chan->desc_lock, irq_flags); + + /* + * Chaining: + * first descriptor is what user is dealing with in all API calls, its + * cookie is at first set to -EBUSY, at tx-submit to a positive + * number + * if more than one chunk is needed further chunks have cookie = -EINVAL + * the last chunk, if not equal to the first, has cookie = -ENOSPC + * all chunks are linked onto the tx_list head with their .node heads + * only during this function, then they are immediately spliced + * back onto the free list in form of a chain + */ + for_each_sg(sgl, sg, sg_len, i) { + dma_addr_t sg_addr = sg_dma_address(sg); + size_t len = sg_dma_len(sg); + + if (!len) + goto err_get_desc; + + do { + dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n", + i, sg, len, (unsigned long long)sg_addr); + + if (direction == DMA_DEV_TO_MEM) + new = sh_dmae_add_desc(sh_chan, flags, + &sg_addr, addr, &len, &first, + direction); + else + new = sh_dmae_add_desc(sh_chan, flags, + addr, &sg_addr, &len, &first, + direction); + if (!new) + goto err_get_desc; + + new->chunks = chunks--; + list_add_tail(&new->node, &tx_list); + } while (len); + } + + if (new != first) + new->async_tx.cookie = -ENOSPC; + + /* Put them back on the free list, so, they don't get lost */ + list_splice_tail(&tx_list, &sh_chan->ld_free); + + spin_unlock_irqrestore(&sh_chan->desc_lock, irq_flags); + + return &first->async_tx; + +err_get_desc: + list_for_each_entry(new, &tx_list, node) + new->mark = DESC_IDLE; + list_splice(&tx_list, &sh_chan->ld_free); + + spin_unlock_irqrestore(&sh_chan->desc_lock, irq_flags); + + return NULL; +} + +static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct sh_dmae_chan *sh_chan; + struct scatterlist sg; + + if (!chan || !len) + return NULL; + + sh_chan = to_sh_chan(chan); + + sg_init_table(&sg, 1); + sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len, + offset_in_page(dma_src)); + sg_dma_address(&sg) = dma_src; + sg_dma_len(&sg) = len; + + return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, + flags); +} + +static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct sh_dmae_slave *param; + struct sh_dmae_chan *sh_chan; + dma_addr_t slave_addr; + + if (!chan) + return NULL; + + sh_chan = to_sh_chan(chan); + param = chan->private; + + /* Someone calling slave DMA on a public channel? */ + if (!param || !sg_len) { + dev_warn(sh_chan->dev, "%s: bad parameter: %p, %d, %d\n", + __func__, param, sg_len, param ? param->slave_id : -1); + return NULL; + } + + slave_addr = param->config->addr; + + /* + * if (param != NULL), this is a successfully requested slave channel, + * therefore param->config != NULL too. + */ + return sh_dmae_prep_sg(sh_chan, sgl, sg_len, &slave_addr, + direction, flags); +} + +static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + unsigned long flags; + + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + if (!chan) + return -EINVAL; + + spin_lock_irqsave(&sh_chan->desc_lock, flags); + dmae_halt(sh_chan); + + if (!list_empty(&sh_chan->ld_queue)) { + /* Record partial transfer */ + struct sh_desc *desc = list_entry(sh_chan->ld_queue.next, + struct sh_desc, node); + desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) << + sh_chan->xmit_shift; + } + spin_unlock_irqrestore(&sh_chan->desc_lock, flags); + + sh_dmae_chan_ld_cleanup(sh_chan, true); + + return 0; +} + +static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) +{ + struct sh_desc *desc, *_desc; + /* Is the "exposed" head of a chain acked? */ + bool head_acked = false; + dma_cookie_t cookie = 0; + dma_async_tx_callback callback = NULL; + void *param = NULL; + unsigned long flags; + + spin_lock_irqsave(&sh_chan->desc_lock, flags); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie); + BUG_ON(desc->mark != DESC_SUBMITTED && + desc->mark != DESC_COMPLETED && + desc->mark != DESC_WAITING); + + /* + * queue is ordered, and we use this loop to (1) clean up all + * completed descriptors, and to (2) update descriptor flags of + * any chunks in a (partially) completed chain + */ + if (!all && desc->mark == DESC_SUBMITTED && + desc->cookie != cookie) + break; + + if (tx->cookie > 0) + cookie = tx->cookie; + + if (desc->mark == DESC_COMPLETED && desc->chunks == 1) { + if (sh_chan->common.completed_cookie != desc->cookie - 1) + dev_dbg(sh_chan->dev, + "Completing cookie %d, expected %d\n", + desc->cookie, + sh_chan->common.completed_cookie + 1); + sh_chan->common.completed_cookie = desc->cookie; + } + + /* Call callback on the last chunk */ + if (desc->mark == DESC_COMPLETED && tx->callback) { + desc->mark = DESC_WAITING; + callback = tx->callback; + param = tx->callback_param; + dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n", + tx->cookie, tx, sh_chan->id); + BUG_ON(desc->chunks != 1); + break; + } + + if (tx->cookie > 0 || tx->cookie == -EBUSY) { + if (desc->mark == DESC_COMPLETED) { + BUG_ON(tx->cookie < 0); + desc->mark = DESC_WAITING; + } + head_acked = async_tx_test_ack(tx); + } else { + switch (desc->mark) { + case DESC_COMPLETED: + desc->mark = DESC_WAITING; + /* Fall through */ + case DESC_WAITING: + if (head_acked) + async_tx_ack(&desc->async_tx); + } + } + + dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n", + tx, tx->cookie); + + if (((desc->mark == DESC_COMPLETED || + desc->mark == DESC_WAITING) && + async_tx_test_ack(&desc->async_tx)) || all) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + + list_move(&desc->node, &sh_chan->ld_free); + + if (list_empty(&sh_chan->ld_queue)) { + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); + pm_runtime_put(sh_chan->dev); + } + } + } + + if (all && !callback) + /* + * Terminating and the loop completed normally: forgive + * uncompleted cookies + */ + sh_chan->common.completed_cookie = sh_chan->common.cookie; + + spin_unlock_irqrestore(&sh_chan->desc_lock, flags); + + if (callback) + callback(param); + + return callback; +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function cleans up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) +{ + while (__ld_cleanup(sh_chan, all)) + ; +} + +/* Called under spin_lock_irq(&sh_chan->desc_lock) */ +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc; + + /* DMA work check */ + if (dmae_is_busy(sh_chan)) + return; + + /* Find the first not transferred descriptor */ + list_for_each_entry(desc, &sh_chan->ld_queue, node) + if (desc->mark == DESC_SUBMITTED) { + dev_dbg(sh_chan->dev, "Queue #%d to %d: %u@%x -> %x\n", + desc->async_tx.cookie, sh_chan->id, + desc->hw.tcr, desc->hw.sar, desc->hw.dar); + /* Get the ld start address from ld_queue */ + dmae_set_reg(sh_chan, &desc->hw); + dmae_start(sh_chan); + break; + } +} + +static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + + spin_lock_irq(&sh_chan->desc_lock); + if (sh_chan->pm_state == DMAE_PM_ESTABLISHED) + sh_chan_xfer_ld_queue(sh_chan); + else + sh_chan->pm_state = DMAE_PM_PENDING; + spin_unlock_irq(&sh_chan->desc_lock); +} + +static enum dma_status sh_dmae_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + enum dma_status status; + unsigned long flags; + + sh_dmae_chan_ld_cleanup(sh_chan, false); + + spin_lock_irqsave(&sh_chan->desc_lock, flags); + + status = dma_cookie_status(chan, cookie, txstate); + + /* + * If we don't find cookie on the queue, it has been aborted and we have + * to report error + */ + if (status != DMA_SUCCESS) { + struct sh_desc *desc; + status = DMA_ERROR; + list_for_each_entry(desc, &sh_chan->ld_queue, node) + if (desc->cookie == cookie) { + status = DMA_IN_PROGRESS; + break; + } + } + + spin_unlock_irqrestore(&sh_chan->desc_lock, flags); + + return status; +} + +static irqreturn_t sh_dmae_interrupt(int irq, void *data) +{ + irqreturn_t ret = IRQ_NONE; + struct sh_dmae_chan *sh_chan = data; + u32 chcr; + + spin_lock(&sh_chan->desc_lock); + + chcr = chcr_read(sh_chan); + + if (chcr & CHCR_TE) { + /* DMA stop */ + dmae_halt(sh_chan); + + ret = IRQ_HANDLED; + tasklet_schedule(&sh_chan->tasklet); + } + + spin_unlock(&sh_chan->desc_lock); + + return ret; +} + +/* Called from error IRQ or NMI */ +static bool sh_dmae_reset(struct sh_dmae_device *shdev) +{ + unsigned int handled = 0; + int i; + + /* halt the dma controller */ + sh_dmae_ctl_stop(shdev); + + /* We cannot detect, which channel caused the error, have to reset all */ + for (i = 0; i < SH_DMAC_MAX_CHANNELS; i++) { + struct sh_dmae_chan *sh_chan = shdev->chan[i]; + struct sh_desc *desc; + LIST_HEAD(dl); + + if (!sh_chan) + continue; + + spin_lock(&sh_chan->desc_lock); + + /* Stop the channel */ + dmae_halt(sh_chan); + + list_splice_init(&sh_chan->ld_queue, &dl); + + if (!list_empty(&dl)) { + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); + pm_runtime_put(sh_chan->dev); + } + sh_chan->pm_state = DMAE_PM_ESTABLISHED; + + spin_unlock(&sh_chan->desc_lock); + + /* Complete all */ + list_for_each_entry(desc, &dl, node) { + struct dma_async_tx_descriptor *tx = &desc->async_tx; + desc->mark = DESC_IDLE; + if (tx->callback) + tx->callback(tx->callback_param); + } + + spin_lock(&sh_chan->desc_lock); + list_splice(&dl, &sh_chan->ld_free); + spin_unlock(&sh_chan->desc_lock); + + handled++; + } + + sh_dmae_rst(shdev); + + return !!handled; +} + +static irqreturn_t sh_dmae_err(int irq, void *data) +{ + struct sh_dmae_device *shdev = data; + + if (!(dmaor_read(shdev) & DMAOR_AE)) + return IRQ_NONE; + + sh_dmae_reset(data); + return IRQ_HANDLED; +} + +static void dmae_do_tasklet(unsigned long data) +{ + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + struct sh_desc *desc; + u32 sar_buf = sh_dmae_readl(sh_chan, SAR); + u32 dar_buf = sh_dmae_readl(sh_chan, DAR); + + spin_lock_irq(&sh_chan->desc_lock); + list_for_each_entry(desc, &sh_chan->ld_queue, node) { + if (desc->mark == DESC_SUBMITTED && + ((desc->direction == DMA_DEV_TO_MEM && + (desc->hw.dar + desc->hw.tcr) == dar_buf) || + (desc->hw.sar + desc->hw.tcr) == sar_buf)) { + dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n", + desc->async_tx.cookie, &desc->async_tx, + desc->hw.dar); + desc->mark = DESC_COMPLETED; + break; + } + } + /* Next desc */ + sh_chan_xfer_ld_queue(sh_chan); + spin_unlock_irq(&sh_chan->desc_lock); + + sh_dmae_chan_ld_cleanup(sh_chan, false); +} + +static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev) +{ + /* Fast path out if NMIF is not asserted for this controller */ + if ((dmaor_read(shdev) & DMAOR_NMIF) == 0) + return false; + + return sh_dmae_reset(shdev); +} + +static int sh_dmae_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *data) +{ + struct sh_dmae_device *shdev; + int ret = NOTIFY_DONE; + bool triggered; + + /* + * Only concern ourselves with NMI events. + * + * Normally we would check the die chain value, but as this needs + * to be architecture independent, check for NMI context instead. + */ + if (!in_nmi()) + return NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) { + /* + * Only stop if one of the controllers has NMIF asserted, + * we do not want to interfere with regular address error + * handling or NMI events that don't concern the DMACs. + */ + triggered = sh_dmae_nmi_notify(shdev); + if (triggered == true) + ret = NOTIFY_OK; + } + rcu_read_unlock(); + + return ret; +} + +static struct notifier_block sh_dmae_nmi_notifier __read_mostly = { + .notifier_call = sh_dmae_nmi_handler, + + /* Run before NMI debug handler and KGDB */ + .priority = 1, +}; + +static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id, + int irq, unsigned long flags) +{ + int err; + const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id]; + struct platform_device *pdev = to_platform_device(shdev->common.dev); + struct sh_dmae_chan *new_sh_chan; + + /* alloc channel */ + new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL); + if (!new_sh_chan) { + dev_err(shdev->common.dev, + "No free memory for allocating dma channels!\n"); + return -ENOMEM; + } + + new_sh_chan->pm_state = DMAE_PM_ESTABLISHED; + + /* reference struct dma_device */ + new_sh_chan->common.device = &shdev->common; + dma_cookie_init(&new_sh_chan->common); + + new_sh_chan->dev = shdev->common.dev; + new_sh_chan->id = id; + new_sh_chan->irq = irq; + new_sh_chan->base = shdev->chan_reg + chan_pdata->offset / sizeof(u32); + + /* Init DMA tasklet */ + tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet, + (unsigned long)new_sh_chan); + + spin_lock_init(&new_sh_chan->desc_lock); + + /* Init descripter manage list */ + INIT_LIST_HEAD(&new_sh_chan->ld_queue); + INIT_LIST_HEAD(&new_sh_chan->ld_free); + + /* Add the channel to DMA device channel list */ + list_add_tail(&new_sh_chan->common.device_node, + &shdev->common.channels); + shdev->common.chancnt++; + + if (pdev->id >= 0) + snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id), + "sh-dmae%d.%d", pdev->id, new_sh_chan->id); + else + snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id), + "sh-dma%d", new_sh_chan->id); + + /* set up channel irq */ + err = request_irq(irq, &sh_dmae_interrupt, flags, + new_sh_chan->dev_id, new_sh_chan); + if (err) { + dev_err(shdev->common.dev, "DMA channel %d request_irq error " + "with return %d\n", id, err); + goto err_no_irq; + } + + shdev->chan[id] = new_sh_chan; + return 0; + +err_no_irq: + /* remove from dmaengine device node */ + list_del(&new_sh_chan->common.device_node); + kfree(new_sh_chan); + return err; +} + +static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) +{ + int i; + + for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) { + if (shdev->chan[i]) { + struct sh_dmae_chan *sh_chan = shdev->chan[i]; + + free_irq(sh_chan->irq, sh_chan); + + list_del(&sh_chan->common.device_node); + kfree(sh_chan); + shdev->chan[i] = NULL; + } + } + shdev->common.chancnt = 0; +} + +static int __init sh_dmae_probe(struct platform_device *pdev) +{ + struct sh_dmae_pdata *pdata = pdev->dev.platform_data; + unsigned long irqflags = IRQF_DISABLED, + chan_flag[SH_DMAC_MAX_CHANNELS] = {}; + int errirq, chan_irq[SH_DMAC_MAX_CHANNELS]; + int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0; + struct sh_dmae_device *shdev; + struct resource *chan, *dmars, *errirq_res, *chanirq_res; + + /* get platform data */ + if (!pdata || !pdata->channel_num) + return -ENODEV; + + chan = platform_get_resource(pdev, IORESOURCE_MEM, 0); + /* DMARS area is optional */ + dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1); + /* + * IRQ resources: + * 1. there always must be at least one IRQ IO-resource. On SH4 it is + * the error IRQ, in which case it is the only IRQ in this resource: + * start == end. If it is the only IRQ resource, all channels also + * use the same IRQ. + * 2. DMA channel IRQ resources can be specified one per resource or in + * ranges (start != end) + * 3. iff all events (channels and, optionally, error) on this + * controller use the same IRQ, only one IRQ resource can be + * specified, otherwise there must be one IRQ per channel, even if + * some of them are equal + * 4. if all IRQs on this controller are equal or if some specific IRQs + * specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be + * requested with the IRQF_SHARED flag + */ + errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!chan || !errirq_res) + return -ENODEV; + + if (!request_mem_region(chan->start, resource_size(chan), pdev->name)) { + dev_err(&pdev->dev, "DMAC register region already claimed\n"); + return -EBUSY; + } + + if (dmars && !request_mem_region(dmars->start, resource_size(dmars), pdev->name)) { + dev_err(&pdev->dev, "DMAC DMARS region already claimed\n"); + err = -EBUSY; + goto ermrdmars; + } + + err = -ENOMEM; + shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL); + if (!shdev) { + dev_err(&pdev->dev, "Not enough memory\n"); + goto ealloc; + } + + shdev->chan_reg = ioremap(chan->start, resource_size(chan)); + if (!shdev->chan_reg) + goto emapchan; + if (dmars) { + shdev->dmars = ioremap(dmars->start, resource_size(dmars)); + if (!shdev->dmars) + goto emapdmars; + } + + /* platform data */ + shdev->pdata = pdata; + + if (pdata->chcr_offset) + shdev->chcr_offset = pdata->chcr_offset; + else + shdev->chcr_offset = CHCR; + + if (pdata->chcr_ie_bit) + shdev->chcr_ie_bit = pdata->chcr_ie_bit; + else + shdev->chcr_ie_bit = CHCR_IE; + + platform_set_drvdata(pdev, shdev); + + shdev->common.dev = &pdev->dev; + + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + spin_lock_irq(&sh_dmae_lock); + list_add_tail_rcu(&shdev->node, &sh_dmae_devices); + spin_unlock_irq(&sh_dmae_lock); + + /* reset dma controller - only needed as a test */ + err = sh_dmae_rst(shdev); + if (err) + goto rst_err; + + INIT_LIST_HEAD(&shdev->common.channels); + + if (!pdata->slave_only) + dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); + if (pdata->slave && pdata->slave_num) + dma_cap_set(DMA_SLAVE, shdev->common.cap_mask); + + shdev->common.device_alloc_chan_resources + = sh_dmae_alloc_chan_resources; + shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources; + shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy; + shdev->common.device_tx_status = sh_dmae_tx_status; + shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; + + /* Compulsory for DMA_SLAVE fields */ + shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg; + shdev->common.device_control = sh_dmae_control; + + /* Default transfer size of 32 bytes requires 32-byte alignment */ + shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE; + +#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE) + chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1); + + if (!chanirq_res) + chanirq_res = errirq_res; + else + irqres++; + + if (chanirq_res == errirq_res || + (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE) + irqflags = IRQF_SHARED; + + errirq = errirq_res->start; + + err = request_irq(errirq, sh_dmae_err, irqflags, + "DMAC Address Error", shdev); + if (err) { + dev_err(&pdev->dev, + "DMA failed requesting irq #%d, error %d\n", + errirq, err); + goto eirq_err; + } + +#else + chanirq_res = errirq_res; +#endif /* CONFIG_CPU_SH4 || CONFIG_ARCH_SHMOBILE */ + + if (chanirq_res->start == chanirq_res->end && + !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) { + /* Special case - all multiplexed */ + for (; irq_cnt < pdata->channel_num; irq_cnt++) { + if (irq_cnt < SH_DMAC_MAX_CHANNELS) { + chan_irq[irq_cnt] = chanirq_res->start; + chan_flag[irq_cnt] = IRQF_SHARED; + } else { + irq_cap = 1; + break; + } + } + } else { + do { + for (i = chanirq_res->start; i <= chanirq_res->end; i++) { + if (irq_cnt >= SH_DMAC_MAX_CHANNELS) { + irq_cap = 1; + break; + } + + if ((errirq_res->flags & IORESOURCE_BITS) == + IORESOURCE_IRQ_SHAREABLE) + chan_flag[irq_cnt] = IRQF_SHARED; + else + chan_flag[irq_cnt] = IRQF_DISABLED; + dev_dbg(&pdev->dev, + "Found IRQ %d for channel %d\n", + i, irq_cnt); + chan_irq[irq_cnt++] = i; + } + + if (irq_cnt >= SH_DMAC_MAX_CHANNELS) + break; + + chanirq_res = platform_get_resource(pdev, + IORESOURCE_IRQ, ++irqres); + } while (irq_cnt < pdata->channel_num && chanirq_res); + } + + /* Create DMA Channel */ + for (i = 0; i < irq_cnt; i++) { + err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]); + if (err) + goto chan_probe_err; + } + + if (irq_cap) + dev_notice(&pdev->dev, "Attempting to register %d DMA " + "channels when a maximum of %d are supported.\n", + pdata->channel_num, SH_DMAC_MAX_CHANNELS); + + pm_runtime_put(&pdev->dev); + + dma_async_device_register(&shdev->common); + + return err; + +chan_probe_err: + sh_dmae_chan_remove(shdev); + +#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE) + free_irq(errirq, shdev); +eirq_err: +#endif +rst_err: + spin_lock_irq(&sh_dmae_lock); + list_del_rcu(&shdev->node); + spin_unlock_irq(&sh_dmae_lock); + + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + if (dmars) + iounmap(shdev->dmars); + + platform_set_drvdata(pdev, NULL); +emapdmars: + iounmap(shdev->chan_reg); + synchronize_rcu(); +emapchan: + kfree(shdev); +ealloc: + if (dmars) + release_mem_region(dmars->start, resource_size(dmars)); +ermrdmars: + release_mem_region(chan->start, resource_size(chan)); + + return err; +} + +static int __exit sh_dmae_remove(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + struct resource *res; + int errirq = platform_get_irq(pdev, 0); + + dma_async_device_unregister(&shdev->common); + + if (errirq > 0) + free_irq(errirq, shdev); + + spin_lock_irq(&sh_dmae_lock); + list_del_rcu(&shdev->node); + spin_unlock_irq(&sh_dmae_lock); + + /* channel data remove */ + sh_dmae_chan_remove(shdev); + + pm_runtime_disable(&pdev->dev); + + if (shdev->dmars) + iounmap(shdev->dmars); + iounmap(shdev->chan_reg); + + platform_set_drvdata(pdev, NULL); + + synchronize_rcu(); + kfree(shdev); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res) + release_mem_region(res->start, resource_size(res)); + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (res) + release_mem_region(res->start, resource_size(res)); + + return 0; +} + +static void sh_dmae_shutdown(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + sh_dmae_ctl_stop(shdev); +} + +static int sh_dmae_runtime_suspend(struct device *dev) +{ + return 0; +} + +static int sh_dmae_runtime_resume(struct device *dev) +{ + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + + return sh_dmae_rst(shdev); +} + +#ifdef CONFIG_PM +static int sh_dmae_suspend(struct device *dev) +{ + return 0; +} + +static int sh_dmae_resume(struct device *dev) +{ + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + int i, ret; + + ret = sh_dmae_rst(shdev); + if (ret < 0) + dev_err(dev, "Failed to reset!\n"); + + for (i = 0; i < shdev->pdata->channel_num; i++) { + struct sh_dmae_chan *sh_chan = shdev->chan[i]; + struct sh_dmae_slave *param = sh_chan->common.private; + + if (!sh_chan->descs_allocated) + continue; + + if (param) { + const struct sh_dmae_slave_config *cfg = param->config; + dmae_set_dmars(sh_chan, cfg->mid_rid); + dmae_set_chcr(sh_chan, cfg->chcr); + } else { + dmae_init(sh_chan); + } + } + + return 0; +} +#else +#define sh_dmae_suspend NULL +#define sh_dmae_resume NULL +#endif + +const struct dev_pm_ops sh_dmae_pm = { + .suspend = sh_dmae_suspend, + .resume = sh_dmae_resume, + .runtime_suspend = sh_dmae_runtime_suspend, + .runtime_resume = sh_dmae_runtime_resume, +}; + +static struct platform_driver sh_dmae_driver = { + .remove = __exit_p(sh_dmae_remove), + .shutdown = sh_dmae_shutdown, + .driver = { + .owner = THIS_MODULE, + .name = "sh-dma-engine", + .pm = &sh_dmae_pm, + }, +}; + +static int __init sh_dmae_init(void) +{ + /* Wire up NMI handling */ + int err = register_die_notifier(&sh_dmae_nmi_notifier); + if (err) + return err; + + return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe); +} +module_init(sh_dmae_init); + +static void __exit sh_dmae_exit(void) +{ + platform_driver_unregister(&sh_dmae_driver); + + unregister_die_notifier(&sh_dmae_nmi_notifier); +} +module_exit(sh_dmae_exit); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>"); +MODULE_DESCRIPTION("Renesas SH DMA Engine driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:sh-dma-engine"); diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h new file mode 100644 index 00000000..0b1d2c10 --- /dev/null +++ b/drivers/dma/shdma.h @@ -0,0 +1,66 @@ +/* + * Renesas SuperH DMA Engine support + * + * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __DMA_SHDMA_H +#define __DMA_SHDMA_H + +#include <linux/dmaengine.h> +#include <linux/interrupt.h> +#include <linux/list.h> + +#define SH_DMAC_MAX_CHANNELS 20 +#define SH_DMA_SLAVE_NUMBER 256 +#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ + +struct device; + +enum dmae_pm_state { + DMAE_PM_ESTABLISHED, + DMAE_PM_BUSY, + DMAE_PM_PENDING, +}; + +struct sh_dmae_chan { + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_queue; /* Link descriptors queue */ + struct list_head ld_free; /* Link descriptors free */ + struct dma_chan common; /* DMA common channel */ + struct device *dev; /* Channel device */ + struct tasklet_struct tasklet; /* Tasklet */ + int descs_allocated; /* desc count */ + int xmit_shift; /* log_2(bytes_per_xfer) */ + int irq; + int id; /* Raw id of this channel */ + u32 __iomem *base; + char dev_id[16]; /* unique name per DMAC of channel */ + int pm_error; + enum dmae_pm_state pm_state; +}; + +struct sh_dmae_device { + struct dma_device common; + struct sh_dmae_chan *chan[SH_DMAC_MAX_CHANNELS]; + struct sh_dmae_pdata *pdata; + struct list_head node; + u32 __iomem *chan_reg; + u16 __iomem *dmars; + unsigned int chcr_offset; + u32 chcr_ie_bit; +}; + +#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common) +#define to_sh_desc(lh) container_of(lh, struct sh_desc, node) +#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx) +#define to_sh_dev(chan) container_of(chan->common.device,\ + struct sh_dmae_device, common) + +#endif /* __DMA_SHDMA_H */ diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c new file mode 100644 index 00000000..434ad311 --- /dev/null +++ b/drivers/dma/sirf-dma.c @@ -0,0 +1,698 @@ +/* + * DMA controller driver for CSR SiRFprimaII + * + * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. + * + * Licensed under GPLv2 or later. + */ + +#include <linux/module.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> +#include <linux/sirfsoc_dma.h> + +#include "dmaengine.h" + +#define SIRFSOC_DMA_DESCRIPTORS 16 +#define SIRFSOC_DMA_CHANNELS 16 + +#define SIRFSOC_DMA_CH_ADDR 0x00 +#define SIRFSOC_DMA_CH_XLEN 0x04 +#define SIRFSOC_DMA_CH_YLEN 0x08 +#define SIRFSOC_DMA_CH_CTRL 0x0C + +#define SIRFSOC_DMA_WIDTH_0 0x100 +#define SIRFSOC_DMA_CH_VALID 0x140 +#define SIRFSOC_DMA_CH_INT 0x144 +#define SIRFSOC_DMA_INT_EN 0x148 +#define SIRFSOC_DMA_CH_LOOP_CTRL 0x150 + +#define SIRFSOC_DMA_MODE_CTRL_BIT 4 +#define SIRFSOC_DMA_DIR_CTRL_BIT 5 + +/* xlen and dma_width register is in 4 bytes boundary */ +#define SIRFSOC_DMA_WORD_LEN 4 + +struct sirfsoc_dma_desc { + struct dma_async_tx_descriptor desc; + struct list_head node; + + /* SiRFprimaII 2D-DMA parameters */ + + int xlen; /* DMA xlen */ + int ylen; /* DMA ylen */ + int width; /* DMA width */ + int dir; + bool cyclic; /* is loop DMA? */ + u32 addr; /* DMA buffer address */ +}; + +struct sirfsoc_dma_chan { + struct dma_chan chan; + struct list_head free; + struct list_head prepared; + struct list_head queued; + struct list_head active; + struct list_head completed; + unsigned long happened_cyclic; + unsigned long completed_cyclic; + + /* Lock for this structure */ + spinlock_t lock; + + int mode; +}; + +struct sirfsoc_dma { + struct dma_device dma; + struct tasklet_struct tasklet; + struct sirfsoc_dma_chan channels[SIRFSOC_DMA_CHANNELS]; + void __iomem *base; + int irq; +}; + +#define DRV_NAME "sirfsoc_dma" + +/* Convert struct dma_chan to struct sirfsoc_dma_chan */ +static inline +struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c) +{ + return container_of(c, struct sirfsoc_dma_chan, chan); +} + +/* Convert struct dma_chan to struct sirfsoc_dma */ +static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c) +{ + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c); + return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]); +} + +/* Execute all queued DMA descriptors */ +static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan) +{ + struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan); + int cid = schan->chan.chan_id; + struct sirfsoc_dma_desc *sdesc = NULL; + + /* + * lock has been held by functions calling this, so we don't hold + * lock again + */ + + sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc, + node); + /* Move the first queued descriptor to active list */ + list_move_tail(&schan->queued, &schan->active); + + /* Start the DMA transfer */ + writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 + + cid * 4); + writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 + + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 + + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) | + (1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + + /* + * writel has an implict memory write barrier to make sure data is + * flushed into memory before starting DMA + */ + writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + schan->happened_cyclic = schan->completed_cyclic = 0; + } +} + +/* Interrupt handler */ +static irqreturn_t sirfsoc_dma_irq(int irq, void *data) +{ + struct sirfsoc_dma *sdma = data; + struct sirfsoc_dma_chan *schan; + struct sirfsoc_dma_desc *sdesc = NULL; + u32 is; + int ch; + + is = readl(sdma->base + SIRFSOC_DMA_CH_INT); + while ((ch = fls(is) - 1) >= 0) { + is &= ~(1 << ch); + writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT); + schan = &sdma->channels[ch]; + + spin_lock(&schan->lock); + + sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, + node); + if (!sdesc->cyclic) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, &schan->completed); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } else + schan->happened_cyclic++; + + spin_unlock(&schan->lock); + } + + /* Schedule tasklet */ + tasklet_schedule(&sdma->tasklet); + + return IRQ_HANDLED; +} + +/* process completed descriptors */ +static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma) +{ + dma_cookie_t last_cookie = 0; + struct sirfsoc_dma_chan *schan; + struct sirfsoc_dma_desc *sdesc; + struct dma_async_tx_descriptor *desc; + unsigned long flags; + unsigned long happened_cyclic; + LIST_HEAD(list); + int i; + + for (i = 0; i < sdma->dma.chancnt; i++) { + schan = &sdma->channels[i]; + + /* Get all completed descriptors */ + spin_lock_irqsave(&schan->lock, flags); + if (!list_empty(&schan->completed)) { + list_splice_tail_init(&schan->completed, &list); + spin_unlock_irqrestore(&schan->lock, flags); + + /* Execute callbacks and run dependencies */ + list_for_each_entry(sdesc, &list, node) { + desc = &sdesc->desc; + + if (desc->callback) + desc->callback(desc->callback_param); + + last_cookie = desc->cookie; + dma_run_dependencies(desc); + } + + /* Free descriptors */ + spin_lock_irqsave(&schan->lock, flags); + list_splice_tail_init(&list, &schan->free); + schan->chan.completed_cookie = last_cookie; + spin_unlock_irqrestore(&schan->lock, flags); + } else { + /* for cyclic channel, desc is always in active list */ + sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, + node); + + if (!sdesc || (sdesc && !sdesc->cyclic)) { + /* without active cyclic DMA */ + spin_unlock_irqrestore(&schan->lock, flags); + continue; + } + + /* cyclic DMA */ + happened_cyclic = schan->happened_cyclic; + spin_unlock_irqrestore(&schan->lock, flags); + + desc = &sdesc->desc; + while (happened_cyclic != schan->completed_cyclic) { + if (desc->callback) + desc->callback(desc->callback_param); + schan->completed_cyclic++; + } + } + } +} + +/* DMA Tasklet */ +static void sirfsoc_dma_tasklet(unsigned long data) +{ + struct sirfsoc_dma *sdma = (void *)data; + + sirfsoc_dma_process_completed(sdma); +} + +/* Submit descriptor to hardware */ +static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan); + struct sirfsoc_dma_desc *sdesc; + unsigned long flags; + dma_cookie_t cookie; + + sdesc = container_of(txd, struct sirfsoc_dma_desc, desc); + + spin_lock_irqsave(&schan->lock, flags); + + /* Move descriptor to queue */ + list_move_tail(&sdesc->node, &schan->queued); + + cookie = dma_cookie_assign(txd); + + spin_unlock_irqrestore(&schan->lock, flags); + + return cookie; +} + +static int sirfsoc_dma_slave_config(struct sirfsoc_dma_chan *schan, + struct dma_slave_config *config) +{ + unsigned long flags; + + if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) || + (config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)) + return -EINVAL; + + spin_lock_irqsave(&schan->lock, flags); + schan->mode = (config->src_maxburst == 4 ? 1 : 0); + spin_unlock_irqrestore(&schan->lock, flags); + + return 0; +} + +static int sirfsoc_dma_terminate_all(struct sirfsoc_dma_chan *schan) +{ + struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan); + int cid = schan->chan.chan_id; + unsigned long flags; + + writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & + ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + + writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) + & ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + + spin_lock_irqsave(&schan->lock, flags); + list_splice_tail_init(&schan->active, &schan->free); + list_splice_tail_init(&schan->queued, &schan->free); + spin_unlock_irqrestore(&schan->lock, flags); + + return 0; +} + +static int sirfsoc_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct dma_slave_config *config; + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + + switch (cmd) { + case DMA_TERMINATE_ALL: + return sirfsoc_dma_terminate_all(schan); + case DMA_SLAVE_CONFIG: + config = (struct dma_slave_config *)arg; + return sirfsoc_dma_slave_config(schan, config); + + default: + break; + } + + return -ENOSYS; +} + +/* Alloc channel resources */ +static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan); + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + struct sirfsoc_dma_desc *sdesc; + unsigned long flags; + LIST_HEAD(descs); + int i; + + /* Alloc descriptors for this channel */ + for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) { + sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL); + if (!sdesc) { + dev_notice(sdma->dma.dev, "Memory allocation error. " + "Allocated only %u descriptors\n", i); + break; + } + + dma_async_tx_descriptor_init(&sdesc->desc, chan); + sdesc->desc.flags = DMA_CTRL_ACK; + sdesc->desc.tx_submit = sirfsoc_dma_tx_submit; + + list_add_tail(&sdesc->node, &descs); + } + + /* Return error only if no descriptors were allocated */ + if (i == 0) + return -ENOMEM; + + spin_lock_irqsave(&schan->lock, flags); + + list_splice_tail_init(&descs, &schan->free); + spin_unlock_irqrestore(&schan->lock, flags); + + return i; +} + +/* Free channel resources */ +static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + struct sirfsoc_dma_desc *sdesc, *tmp; + unsigned long flags; + LIST_HEAD(descs); + + spin_lock_irqsave(&schan->lock, flags); + + /* Channel must be idle */ + BUG_ON(!list_empty(&schan->prepared)); + BUG_ON(!list_empty(&schan->queued)); + BUG_ON(!list_empty(&schan->active)); + BUG_ON(!list_empty(&schan->completed)); + + /* Move data */ + list_splice_tail_init(&schan->free, &descs); + + spin_unlock_irqrestore(&schan->lock, flags); + + /* Free descriptors */ + list_for_each_entry_safe(sdesc, tmp, &descs, node) + kfree(sdesc); +} + +/* Send pending descriptor to hardware */ +static void sirfsoc_dma_issue_pending(struct dma_chan *chan) +{ + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&schan->lock, flags); + + if (list_empty(&schan->active) && !list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + + spin_unlock_irqrestore(&schan->lock, flags); +} + +/* Check request completion status */ +static enum dma_status +sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + unsigned long flags; + enum dma_status ret; + + spin_lock_irqsave(&schan->lock, flags); + ret = dma_cookie_status(chan, cookie, txstate); + spin_unlock_irqrestore(&schan->lock, flags); + + return ret; +} + +static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved( + struct dma_chan *chan, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan); + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + struct sirfsoc_dma_desc *sdesc = NULL; + unsigned long iflags; + int ret; + + if ((xt->dir != DMA_MEM_TO_DEV) || (xt->dir != DMA_DEV_TO_MEM)) { + ret = -EINVAL; + goto err_dir; + } + + /* Get free descriptor */ + spin_lock_irqsave(&schan->lock, iflags); + if (!list_empty(&schan->free)) { + sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc, + node); + list_del(&sdesc->node); + } + spin_unlock_irqrestore(&schan->lock, iflags); + + if (!sdesc) { + /* try to free completed descriptors */ + sirfsoc_dma_process_completed(sdma); + ret = 0; + goto no_desc; + } + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&schan->lock, iflags); + + /* + * Number of chunks in a frame can only be 1 for prima2 + * and ylen (number of frame - 1) must be at least 0 + */ + if ((xt->frame_size == 1) && (xt->numf > 0)) { + sdesc->cyclic = 0; + sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN; + sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) / + SIRFSOC_DMA_WORD_LEN; + sdesc->ylen = xt->numf - 1; + if (xt->dir == DMA_MEM_TO_DEV) { + sdesc->addr = xt->src_start; + sdesc->dir = 1; + } else { + sdesc->addr = xt->dst_start; + sdesc->dir = 0; + } + + list_add_tail(&sdesc->node, &schan->prepared); + } else { + pr_err("sirfsoc DMA Invalid xfer\n"); + ret = -EINVAL; + goto err_xfer; + } + spin_unlock_irqrestore(&schan->lock, iflags); + + return &sdesc->desc; +err_xfer: + spin_unlock_irqrestore(&schan->lock, iflags); +no_desc: +err_dir: + return ERR_PTR(ret); +} + +static struct dma_async_tx_descriptor * +sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction direction, void *context) +{ + struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); + struct sirfsoc_dma_desc *sdesc = NULL; + unsigned long iflags; + + /* + * we only support cycle transfer with 2 period + * If the X-length is set to 0, it would be the loop mode. + * The DMA address keeps increasing until reaching the end of a loop + * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then + * the DMA address goes back to the beginning of this area. + * In loop mode, the DMA data region is divided into two parts, BUFA + * and BUFB. DMA controller generates interrupts twice in each loop: + * when the DMA address reaches the end of BUFA or the end of the + * BUFB + */ + if (buf_len != 2 * period_len) + return ERR_PTR(-EINVAL); + + /* Get free descriptor */ + spin_lock_irqsave(&schan->lock, iflags); + if (!list_empty(&schan->free)) { + sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc, + node); + list_del(&sdesc->node); + } + spin_unlock_irqrestore(&schan->lock, iflags); + + if (!sdesc) + return 0; + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&schan->lock, iflags); + sdesc->addr = addr; + sdesc->cyclic = 1; + sdesc->xlen = 0; + sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1; + sdesc->width = 1; + list_add_tail(&sdesc->node, &schan->prepared); + spin_unlock_irqrestore(&schan->lock, iflags); + + return &sdesc->desc; +} + +/* + * The DMA controller consists of 16 independent DMA channels. + * Each channel is allocated to a different function + */ +bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id) +{ + unsigned int ch_nr = (unsigned int) chan_id; + + if (ch_nr == chan->chan_id + + chan->device->dev_id * SIRFSOC_DMA_CHANNELS) + return true; + + return false; +} +EXPORT_SYMBOL(sirfsoc_dma_filter_id); + +static int __devinit sirfsoc_dma_probe(struct platform_device *op) +{ + struct device_node *dn = op->dev.of_node; + struct device *dev = &op->dev; + struct dma_device *dma; + struct sirfsoc_dma *sdma; + struct sirfsoc_dma_chan *schan; + struct resource res; + ulong regs_start, regs_size; + u32 id; + int ret, i; + + sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL); + if (!sdma) { + dev_err(dev, "Memory exhausted!\n"); + return -ENOMEM; + } + + if (of_property_read_u32(dn, "cell-index", &id)) { + dev_err(dev, "Fail to get DMAC index\n"); + ret = -ENODEV; + goto free_mem; + } + + sdma->irq = irq_of_parse_and_map(dn, 0); + if (sdma->irq == NO_IRQ) { + dev_err(dev, "Error mapping IRQ!\n"); + ret = -EINVAL; + goto free_mem; + } + + ret = of_address_to_resource(dn, 0, &res); + if (ret) { + dev_err(dev, "Error parsing memory region!\n"); + goto free_mem; + } + + regs_start = res.start; + regs_size = resource_size(&res); + + sdma->base = devm_ioremap(dev, regs_start, regs_size); + if (!sdma->base) { + dev_err(dev, "Error mapping memory region!\n"); + ret = -ENOMEM; + goto irq_dispose; + } + + ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME, + sdma); + if (ret) { + dev_err(dev, "Error requesting IRQ!\n"); + ret = -EINVAL; + goto unmap_mem; + } + + dma = &sdma->dma; + dma->dev = dev; + dma->chancnt = SIRFSOC_DMA_CHANNELS; + + dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources; + dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources; + dma->device_issue_pending = sirfsoc_dma_issue_pending; + dma->device_control = sirfsoc_dma_control; + dma->device_tx_status = sirfsoc_dma_tx_status; + dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved; + dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic; + + INIT_LIST_HEAD(&dma->channels); + dma_cap_set(DMA_SLAVE, dma->cap_mask); + dma_cap_set(DMA_CYCLIC, dma->cap_mask); + dma_cap_set(DMA_INTERLEAVE, dma->cap_mask); + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + + for (i = 0; i < dma->chancnt; i++) { + schan = &sdma->channels[i]; + + schan->chan.device = dma; + dma_cookie_init(&schan->chan); + + INIT_LIST_HEAD(&schan->free); + INIT_LIST_HEAD(&schan->prepared); + INIT_LIST_HEAD(&schan->queued); + INIT_LIST_HEAD(&schan->active); + INIT_LIST_HEAD(&schan->completed); + + spin_lock_init(&schan->lock); + list_add_tail(&schan->chan.device_node, &dma->channels); + } + + tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma); + + /* Register DMA engine */ + dev_set_drvdata(dev, sdma); + ret = dma_async_device_register(dma); + if (ret) + goto free_irq; + + dev_info(dev, "initialized SIRFSOC DMAC driver\n"); + + return 0; + +free_irq: + devm_free_irq(dev, sdma->irq, sdma); +irq_dispose: + irq_dispose_mapping(sdma->irq); +unmap_mem: + iounmap(sdma->base); +free_mem: + devm_kfree(dev, sdma); + return ret; +} + +static int __devexit sirfsoc_dma_remove(struct platform_device *op) +{ + struct device *dev = &op->dev; + struct sirfsoc_dma *sdma = dev_get_drvdata(dev); + + dma_async_device_unregister(&sdma->dma); + devm_free_irq(dev, sdma->irq, sdma); + irq_dispose_mapping(sdma->irq); + iounmap(sdma->base); + devm_kfree(dev, sdma); + return 0; +} + +static struct of_device_id sirfsoc_dma_match[] = { + { .compatible = "sirf,prima2-dmac", }, + {}, +}; + +static struct platform_driver sirfsoc_dma_driver = { + .probe = sirfsoc_dma_probe, + .remove = __devexit_p(sirfsoc_dma_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = sirfsoc_dma_match, + }, +}; + +module_platform_driver(sirfsoc_dma_driver); + +MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, " + "Barry Song <baohua.song@csr.com>"); +MODULE_DESCRIPTION("SIRFSOC DMA control driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c new file mode 100644 index 00000000..2ed1ac35 --- /dev/null +++ b/drivers/dma/ste_dma40.c @@ -0,0 +1,3428 @@ +/* + * Copyright (C) Ericsson AB 2007-2008 + * Copyright (C) ST-Ericsson SA 2008-2010 + * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + */ + +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/err.h> +#include <linux/amba/bus.h> +#include <linux/regulator/consumer.h> + +#include <plat/ste_dma40.h> + +#include "dmaengine.h" +#include "ste_dma40_ll.h" + +#define D40_NAME "dma40" + +#define D40_PHY_CHAN -1 + +/* For masking out/in 2 bit channel positions */ +#define D40_CHAN_POS(chan) (2 * (chan / 2)) +#define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan)) + +/* Maximum iterations taken before giving up suspending a channel */ +#define D40_SUSPEND_MAX_IT 500 + +/* Milliseconds */ +#define DMA40_AUTOSUSPEND_DELAY 100 + +/* Hardware requirement on LCLA alignment */ +#define LCLA_ALIGNMENT 0x40000 + +/* Max number of links per event group */ +#define D40_LCLA_LINK_PER_EVENT_GRP 128 +#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP + +/* Attempts before giving up to trying to get pages that are aligned */ +#define MAX_LCLA_ALLOC_ATTEMPTS 256 + +/* Bit markings for allocation map */ +#define D40_ALLOC_FREE (1 << 31) +#define D40_ALLOC_PHY (1 << 30) +#define D40_ALLOC_LOG_FREE 0 + +/** + * enum 40_command - The different commands and/or statuses. + * + * @D40_DMA_STOP: DMA channel command STOP or status STOPPED, + * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN. + * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible. + * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED. + */ +enum d40_command { + D40_DMA_STOP = 0, + D40_DMA_RUN = 1, + D40_DMA_SUSPEND_REQ = 2, + D40_DMA_SUSPENDED = 3 +}; + +/* + * enum d40_events - The different Event Enables for the event lines. + * + * @D40_DEACTIVATE_EVENTLINE: De-activate Event line, stopping the logical chan. + * @D40_ACTIVATE_EVENTLINE: Activate the Event line, to start a logical chan. + * @D40_SUSPEND_REQ_EVENTLINE: Requesting for suspending a event line. + * @D40_ROUND_EVENTLINE: Status check for event line. + */ + +enum d40_events { + D40_DEACTIVATE_EVENTLINE = 0, + D40_ACTIVATE_EVENTLINE = 1, + D40_SUSPEND_REQ_EVENTLINE = 2, + D40_ROUND_EVENTLINE = 3 +}; + +/* + * These are the registers that has to be saved and later restored + * when the DMA hw is powered off. + * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works. + */ +static u32 d40_backup_regs[] = { + D40_DREG_LCPA, + D40_DREG_LCLA, + D40_DREG_PRMSE, + D40_DREG_PRMSO, + D40_DREG_PRMOE, + D40_DREG_PRMOO, +}; + +#define BACKUP_REGS_SZ ARRAY_SIZE(d40_backup_regs) + +/* TODO: Check if all these registers have to be saved/restored on dma40 v3 */ +static u32 d40_backup_regs_v3[] = { + D40_DREG_PSEG1, + D40_DREG_PSEG2, + D40_DREG_PSEG3, + D40_DREG_PSEG4, + D40_DREG_PCEG1, + D40_DREG_PCEG2, + D40_DREG_PCEG3, + D40_DREG_PCEG4, + D40_DREG_RSEG1, + D40_DREG_RSEG2, + D40_DREG_RSEG3, + D40_DREG_RSEG4, + D40_DREG_RCEG1, + D40_DREG_RCEG2, + D40_DREG_RCEG3, + D40_DREG_RCEG4, +}; + +#define BACKUP_REGS_SZ_V3 ARRAY_SIZE(d40_backup_regs_v3) + +static u32 d40_backup_regs_chan[] = { + D40_CHAN_REG_SSCFG, + D40_CHAN_REG_SSELT, + D40_CHAN_REG_SSPTR, + D40_CHAN_REG_SSLNK, + D40_CHAN_REG_SDCFG, + D40_CHAN_REG_SDELT, + D40_CHAN_REG_SDPTR, + D40_CHAN_REG_SDLNK, +}; + +/** + * struct d40_lli_pool - Structure for keeping LLIs in memory + * + * @base: Pointer to memory area when the pre_alloc_lli's are not large + * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if + * pre_alloc_lli is used. + * @dma_addr: DMA address, if mapped + * @size: The size in bytes of the memory at base or the size of pre_alloc_lli. + * @pre_alloc_lli: Pre allocated area for the most common case of transfers, + * one buffer to one buffer. + */ +struct d40_lli_pool { + void *base; + int size; + dma_addr_t dma_addr; + /* Space for dst and src, plus an extra for padding */ + u8 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)]; +}; + +/** + * struct d40_desc - A descriptor is one DMA job. + * + * @lli_phy: LLI settings for physical channel. Both src and dst= + * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if + * lli_len equals one. + * @lli_log: Same as above but for logical channels. + * @lli_pool: The pool with two entries pre-allocated. + * @lli_len: Number of llis of current descriptor. + * @lli_current: Number of transferred llis. + * @lcla_alloc: Number of LCLA entries allocated. + * @txd: DMA engine struct. Used for among other things for communication + * during a transfer. + * @node: List entry. + * @is_in_client_list: true if the client owns this descriptor. + * @cyclic: true if this is a cyclic job + * + * This descriptor is used for both logical and physical transfers. + */ +struct d40_desc { + /* LLI physical */ + struct d40_phy_lli_bidir lli_phy; + /* LLI logical */ + struct d40_log_lli_bidir lli_log; + + struct d40_lli_pool lli_pool; + int lli_len; + int lli_current; + int lcla_alloc; + + struct dma_async_tx_descriptor txd; + struct list_head node; + + bool is_in_client_list; + bool cyclic; +}; + +/** + * struct d40_lcla_pool - LCLA pool settings and data. + * + * @base: The virtual address of LCLA. 18 bit aligned. + * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used. + * This pointer is only there for clean-up on error. + * @pages: The number of pages needed for all physical channels. + * Only used later for clean-up on error + * @lock: Lock to protect the content in this struct. + * @alloc_map: big map over which LCLA entry is own by which job. + */ +struct d40_lcla_pool { + void *base; + dma_addr_t dma_addr; + void *base_unaligned; + int pages; + spinlock_t lock; + struct d40_desc **alloc_map; +}; + +/** + * struct d40_phy_res - struct for handling eventlines mapped to physical + * channels. + * + * @lock: A lock protection this entity. + * @reserved: True if used by secure world or otherwise. + * @num: The physical channel number of this entity. + * @allocated_src: Bit mapped to show which src event line's are mapped to + * this physical channel. Can also be free or physically allocated. + * @allocated_dst: Same as for src but is dst. + * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as + * event line number. + */ +struct d40_phy_res { + spinlock_t lock; + bool reserved; + int num; + u32 allocated_src; + u32 allocated_dst; +}; + +struct d40_base; + +/** + * struct d40_chan - Struct that describes a channel. + * + * @lock: A spinlock to protect this struct. + * @log_num: The logical number, if any of this channel. + * @pending_tx: The number of pending transfers. Used between interrupt handler + * and tasklet. + * @busy: Set to true when transfer is ongoing on this channel. + * @phy_chan: Pointer to physical channel which this instance runs on. If this + * point is NULL, then the channel is not allocated. + * @chan: DMA engine handle. + * @tasklet: Tasklet that gets scheduled from interrupt context to complete a + * transfer and call client callback. + * @client: Cliented owned descriptor list. + * @pending_queue: Submitted jobs, to be issued by issue_pending() + * @active: Active descriptor. + * @queue: Queued jobs. + * @prepare_queue: Prepared jobs. + * @dma_cfg: The client configuration of this dma channel. + * @configured: whether the dma_cfg configuration is valid + * @base: Pointer to the device instance struct. + * @src_def_cfg: Default cfg register setting for src. + * @dst_def_cfg: Default cfg register setting for dst. + * @log_def: Default logical channel settings. + * @lcpa: Pointer to dst and src lcpa settings. + * @runtime_addr: runtime configured address. + * @runtime_direction: runtime configured direction. + * + * This struct can either "be" a logical or a physical channel. + */ +struct d40_chan { + spinlock_t lock; + int log_num; + int pending_tx; + bool busy; + struct d40_phy_res *phy_chan; + struct dma_chan chan; + struct tasklet_struct tasklet; + struct list_head client; + struct list_head pending_queue; + struct list_head active; + struct list_head queue; + struct list_head prepare_queue; + struct stedma40_chan_cfg dma_cfg; + bool configured; + struct d40_base *base; + /* Default register configurations */ + u32 src_def_cfg; + u32 dst_def_cfg; + struct d40_def_lcsp log_def; + struct d40_log_lli_full *lcpa; + /* Runtime reconfiguration */ + dma_addr_t runtime_addr; + enum dma_transfer_direction runtime_direction; +}; + +/** + * struct d40_base - The big global struct, one for each probe'd instance. + * + * @interrupt_lock: Lock used to make sure one interrupt is handle a time. + * @execmd_lock: Lock for execute command usage since several channels share + * the same physical register. + * @dev: The device structure. + * @virtbase: The virtual base address of the DMA's register. + * @rev: silicon revision detected. + * @clk: Pointer to the DMA clock structure. + * @phy_start: Physical memory start of the DMA registers. + * @phy_size: Size of the DMA register map. + * @irq: The IRQ number. + * @num_phy_chans: The number of physical channels. Read from HW. This + * is the number of available channels for this driver, not counting "Secure + * mode" allocated physical channels. + * @num_log_chans: The number of logical channels. Calculated from + * num_phy_chans. + * @dma_both: dma_device channels that can do both memcpy and slave transfers. + * @dma_slave: dma_device channels that can do only do slave transfers. + * @dma_memcpy: dma_device channels that can do only do memcpy transfers. + * @phy_chans: Room for all possible physical channels in system. + * @log_chans: Room for all possible logical channels in system. + * @lookup_log_chans: Used to map interrupt number to logical channel. Points + * to log_chans entries. + * @lookup_phy_chans: Used to map interrupt number to physical channel. Points + * to phy_chans entries. + * @plat_data: Pointer to provided platform_data which is the driver + * configuration. + * @lcpa_regulator: Pointer to hold the regulator for the esram bank for lcla. + * @phy_res: Vector containing all physical channels. + * @lcla_pool: lcla pool settings and data. + * @lcpa_base: The virtual mapped address of LCPA. + * @phy_lcpa: The physical address of the LCPA. + * @lcpa_size: The size of the LCPA area. + * @desc_slab: cache for descriptors. + * @reg_val_backup: Here the values of some hardware registers are stored + * before the DMA is powered off. They are restored when the power is back on. + * @reg_val_backup_v3: Backup of registers that only exits on dma40 v3 and + * later. + * @reg_val_backup_chan: Backup data for standard channel parameter registers. + * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off. + * @initialized: true if the dma has been initialized + */ +struct d40_base { + spinlock_t interrupt_lock; + spinlock_t execmd_lock; + struct device *dev; + void __iomem *virtbase; + u8 rev:4; + struct clk *clk; + phys_addr_t phy_start; + resource_size_t phy_size; + int irq; + int num_phy_chans; + int num_log_chans; + struct dma_device dma_both; + struct dma_device dma_slave; + struct dma_device dma_memcpy; + struct d40_chan *phy_chans; + struct d40_chan *log_chans; + struct d40_chan **lookup_log_chans; + struct d40_chan **lookup_phy_chans; + struct stedma40_platform_data *plat_data; + struct regulator *lcpa_regulator; + /* Physical half channels */ + struct d40_phy_res *phy_res; + struct d40_lcla_pool lcla_pool; + void *lcpa_base; + dma_addr_t phy_lcpa; + resource_size_t lcpa_size; + struct kmem_cache *desc_slab; + u32 reg_val_backup[BACKUP_REGS_SZ]; + u32 reg_val_backup_v3[BACKUP_REGS_SZ_V3]; + u32 *reg_val_backup_chan; + u16 gcc_pwr_off_mask; + bool initialized; +}; + +/** + * struct d40_interrupt_lookup - lookup table for interrupt handler + * + * @src: Interrupt mask register. + * @clr: Interrupt clear register. + * @is_error: true if this is an error interrupt. + * @offset: start delta in the lookup_log_chans in d40_base. If equals to + * D40_PHY_CHAN, the lookup_phy_chans shall be used instead. + */ +struct d40_interrupt_lookup { + u32 src; + u32 clr; + bool is_error; + int offset; +}; + +/** + * struct d40_reg_val - simple lookup struct + * + * @reg: The register. + * @val: The value that belongs to the register in reg. + */ +struct d40_reg_val { + unsigned int reg; + unsigned int val; +}; + +static struct device *chan2dev(struct d40_chan *d40c) +{ + return &d40c->chan.dev->device; +} + +static bool chan_is_physical(struct d40_chan *chan) +{ + return chan->log_num == D40_PHY_CHAN; +} + +static bool chan_is_logical(struct d40_chan *chan) +{ + return !chan_is_physical(chan); +} + +static void __iomem *chan_base(struct d40_chan *chan) +{ + return chan->base->virtbase + D40_DREG_PCBASE + + chan->phy_chan->num * D40_DREG_PCDELTA; +} + +#define d40_err(dev, format, arg...) \ + dev_err(dev, "[%s] " format, __func__, ## arg) + +#define chan_err(d40c, format, arg...) \ + d40_err(chan2dev(d40c), format, ## arg) + +static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d, + int lli_len) +{ + bool is_log = chan_is_logical(d40c); + u32 align; + void *base; + + if (is_log) + align = sizeof(struct d40_log_lli); + else + align = sizeof(struct d40_phy_lli); + + if (lli_len == 1) { + base = d40d->lli_pool.pre_alloc_lli; + d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli); + d40d->lli_pool.base = NULL; + } else { + d40d->lli_pool.size = lli_len * 2 * align; + + base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT); + d40d->lli_pool.base = base; + + if (d40d->lli_pool.base == NULL) + return -ENOMEM; + } + + if (is_log) { + d40d->lli_log.src = PTR_ALIGN(base, align); + d40d->lli_log.dst = d40d->lli_log.src + lli_len; + + d40d->lli_pool.dma_addr = 0; + } else { + d40d->lli_phy.src = PTR_ALIGN(base, align); + d40d->lli_phy.dst = d40d->lli_phy.src + lli_len; + + d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev, + d40d->lli_phy.src, + d40d->lli_pool.size, + DMA_TO_DEVICE); + + if (dma_mapping_error(d40c->base->dev, + d40d->lli_pool.dma_addr)) { + kfree(d40d->lli_pool.base); + d40d->lli_pool.base = NULL; + d40d->lli_pool.dma_addr = 0; + return -ENOMEM; + } + } + + return 0; +} + +static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d) +{ + if (d40d->lli_pool.dma_addr) + dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr, + d40d->lli_pool.size, DMA_TO_DEVICE); + + kfree(d40d->lli_pool.base); + d40d->lli_pool.base = NULL; + d40d->lli_pool.size = 0; + d40d->lli_log.src = NULL; + d40d->lli_log.dst = NULL; + d40d->lli_phy.src = NULL; + d40d->lli_phy.dst = NULL; +} + +static int d40_lcla_alloc_one(struct d40_chan *d40c, + struct d40_desc *d40d) +{ + unsigned long flags; + int i; + int ret = -EINVAL; + int p; + + spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); + + p = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP; + + /* + * Allocate both src and dst at the same time, therefore the half + * start on 1 since 0 can't be used since zero is used as end marker. + */ + for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) { + if (!d40c->base->lcla_pool.alloc_map[p + i]) { + d40c->base->lcla_pool.alloc_map[p + i] = d40d; + d40d->lcla_alloc++; + ret = i; + break; + } + } + + spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); + + return ret; +} + +static int d40_lcla_free_all(struct d40_chan *d40c, + struct d40_desc *d40d) +{ + unsigned long flags; + int i; + int ret = -EINVAL; + + if (chan_is_physical(d40c)) + return 0; + + spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags); + + for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) { + if (d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num * + D40_LCLA_LINK_PER_EVENT_GRP + i] == d40d) { + d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num * + D40_LCLA_LINK_PER_EVENT_GRP + i] = NULL; + d40d->lcla_alloc--; + if (d40d->lcla_alloc == 0) { + ret = 0; + break; + } + } + } + + spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags); + + return ret; + +} + +static void d40_desc_remove(struct d40_desc *d40d) +{ + list_del(&d40d->node); +} + +static struct d40_desc *d40_desc_get(struct d40_chan *d40c) +{ + struct d40_desc *desc = NULL; + + if (!list_empty(&d40c->client)) { + struct d40_desc *d; + struct d40_desc *_d; + + list_for_each_entry_safe(d, _d, &d40c->client, node) { + if (async_tx_test_ack(&d->txd)) { + d40_desc_remove(d); + desc = d; + memset(desc, 0, sizeof(*desc)); + break; + } + } + } + + if (!desc) + desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT); + + if (desc) + INIT_LIST_HEAD(&desc->node); + + return desc; +} + +static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d) +{ + + d40_pool_lli_free(d40c, d40d); + d40_lcla_free_all(d40c, d40d); + kmem_cache_free(d40c->base->desc_slab, d40d); +} + +static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc) +{ + list_add_tail(&desc->node, &d40c->active); +} + +static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc) +{ + struct d40_phy_lli *lli_dst = desc->lli_phy.dst; + struct d40_phy_lli *lli_src = desc->lli_phy.src; + void __iomem *base = chan_base(chan); + + writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG); + writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT); + writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR); + writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK); + + writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG); + writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT); + writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR); + writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK); +} + +static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc) +{ + struct d40_lcla_pool *pool = &chan->base->lcla_pool; + struct d40_log_lli_bidir *lli = &desc->lli_log; + int lli_current = desc->lli_current; + int lli_len = desc->lli_len; + bool cyclic = desc->cyclic; + int curr_lcla = -EINVAL; + int first_lcla = 0; + bool use_esram_lcla = chan->base->plat_data->use_esram_lcla; + bool linkback; + + /* + * We may have partially running cyclic transfers, in case we did't get + * enough LCLA entries. + */ + linkback = cyclic && lli_current == 0; + + /* + * For linkback, we need one LCLA even with only one link, because we + * can't link back to the one in LCPA space + */ + if (linkback || (lli_len - lli_current > 1)) { + curr_lcla = d40_lcla_alloc_one(chan, desc); + first_lcla = curr_lcla; + } + + /* + * For linkback, we normally load the LCPA in the loop since we need to + * link it to the second LCLA and not the first. However, if we + * couldn't even get a first LCLA, then we have to run in LCPA and + * reload manually. + */ + if (!linkback || curr_lcla == -EINVAL) { + unsigned int flags = 0; + + if (curr_lcla == -EINVAL) + flags |= LLI_TERM_INT; + + d40_log_lli_lcpa_write(chan->lcpa, + &lli->dst[lli_current], + &lli->src[lli_current], + curr_lcla, + flags); + lli_current++; + } + + if (curr_lcla < 0) + goto out; + + for (; lli_current < lli_len; lli_current++) { + unsigned int lcla_offset = chan->phy_chan->num * 1024 + + 8 * curr_lcla * 2; + struct d40_log_lli *lcla = pool->base + lcla_offset; + unsigned int flags = 0; + int next_lcla; + + if (lli_current + 1 < lli_len) + next_lcla = d40_lcla_alloc_one(chan, desc); + else + next_lcla = linkback ? first_lcla : -EINVAL; + + if (cyclic || next_lcla == -EINVAL) + flags |= LLI_TERM_INT; + + if (linkback && curr_lcla == first_lcla) { + /* First link goes in both LCPA and LCLA */ + d40_log_lli_lcpa_write(chan->lcpa, + &lli->dst[lli_current], + &lli->src[lli_current], + next_lcla, flags); + } + + /* + * One unused LCLA in the cyclic case if the very first + * next_lcla fails... + */ + d40_log_lli_lcla_write(lcla, + &lli->dst[lli_current], + &lli->src[lli_current], + next_lcla, flags); + + /* + * Cache maintenance is not needed if lcla is + * mapped in esram + */ + if (!use_esram_lcla) { + dma_sync_single_range_for_device(chan->base->dev, + pool->dma_addr, lcla_offset, + 2 * sizeof(struct d40_log_lli), + DMA_TO_DEVICE); + } + curr_lcla = next_lcla; + + if (curr_lcla == -EINVAL || curr_lcla == first_lcla) { + lli_current++; + break; + } + } + +out: + desc->lli_current = lli_current; +} + +static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d) +{ + if (chan_is_physical(d40c)) { + d40_phy_lli_load(d40c, d40d); + d40d->lli_current = d40d->lli_len; + } else + d40_log_lli_to_lcxa(d40c, d40d); +} + +static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->active)) + return NULL; + + d = list_first_entry(&d40c->active, + struct d40_desc, + node); + return d; +} + +/* remove desc from current queue and add it to the pending_queue */ +static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) +{ + d40_desc_remove(desc); + desc->is_in_client_list = false; + list_add_tail(&desc->node, &d40c->pending_queue); +} + +static struct d40_desc *d40_first_pending(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->pending_queue)) + return NULL; + + d = list_first_entry(&d40c->pending_queue, + struct d40_desc, + node); + return d; +} + +static struct d40_desc *d40_first_queued(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->queue)) + return NULL; + + d = list_first_entry(&d40c->queue, + struct d40_desc, + node); + return d; +} + +static int d40_psize_2_burst_size(bool is_log, int psize) +{ + if (is_log) { + if (psize == STEDMA40_PSIZE_LOG_1) + return 1; + } else { + if (psize == STEDMA40_PSIZE_PHY_1) + return 1; + } + + return 2 << psize; +} + +/* + * The dma only supports transmitting packages up to + * STEDMA40_MAX_SEG_SIZE << data_width. Calculate the total number of + * dma elements required to send the entire sg list + */ +static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2) +{ + int dmalen; + u32 max_w = max(data_width1, data_width2); + u32 min_w = min(data_width1, data_width2); + u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w); + + if (seg_max > STEDMA40_MAX_SEG_SIZE) + seg_max -= (1 << max_w); + + if (!IS_ALIGNED(size, 1 << max_w)) + return -EINVAL; + + if (size <= seg_max) + dmalen = 1; + else { + dmalen = size / seg_max; + if (dmalen * seg_max < size) + dmalen++; + } + return dmalen; +} + +static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len, + u32 data_width1, u32 data_width2) +{ + struct scatterlist *sg; + int i; + int len = 0; + int ret; + + for_each_sg(sgl, sg, sg_len, i) { + ret = d40_size_2_dmalen(sg_dma_len(sg), + data_width1, data_width2); + if (ret < 0) + return ret; + len += ret; + } + return len; +} + + +#ifdef CONFIG_PM +static void dma40_backup(void __iomem *baseaddr, u32 *backup, + u32 *regaddr, int num, bool save) +{ + int i; + + for (i = 0; i < num; i++) { + void __iomem *addr = baseaddr + regaddr[i]; + + if (save) + backup[i] = readl_relaxed(addr); + else + writel_relaxed(backup[i], addr); + } +} + +static void d40_save_restore_registers(struct d40_base *base, bool save) +{ + int i; + + /* Save/Restore channel specific registers */ + for (i = 0; i < base->num_phy_chans; i++) { + void __iomem *addr; + int idx; + + if (base->phy_res[i].reserved) + continue; + + addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA; + idx = i * ARRAY_SIZE(d40_backup_regs_chan); + + dma40_backup(addr, &base->reg_val_backup_chan[idx], + d40_backup_regs_chan, + ARRAY_SIZE(d40_backup_regs_chan), + save); + } + + /* Save/Restore global registers */ + dma40_backup(base->virtbase, base->reg_val_backup, + d40_backup_regs, ARRAY_SIZE(d40_backup_regs), + save); + + /* Save/Restore registers only existing on dma40 v3 and later */ + if (base->rev >= 3) + dma40_backup(base->virtbase, base->reg_val_backup_v3, + d40_backup_regs_v3, + ARRAY_SIZE(d40_backup_regs_v3), + save); +} +#else +static void d40_save_restore_registers(struct d40_base *base, bool save) +{ +} +#endif + +static int __d40_execute_command_phy(struct d40_chan *d40c, + enum d40_command command) +{ + u32 status; + int i; + void __iomem *active_reg; + int ret = 0; + unsigned long flags; + u32 wmask; + + if (command == D40_DMA_STOP) { + ret = __d40_execute_command_phy(d40c, D40_DMA_SUSPEND_REQ); + if (ret) + return ret; + } + + spin_lock_irqsave(&d40c->base->execmd_lock, flags); + + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + if (command == D40_DMA_SUSPEND_REQ) { + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP) + goto done; + } + + wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num)); + writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)), + active_reg); + + if (command == D40_DMA_SUSPEND_REQ) { + + for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) { + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + cpu_relax(); + /* + * Reduce the number of bus accesses while + * waiting for the DMA to suspend. + */ + udelay(3); + + if (status == D40_DMA_STOP || + status == D40_DMA_SUSPENDED) + break; + } + + if (i == D40_SUSPEND_MAX_IT) { + chan_err(d40c, + "unable to suspend the chl %d (log: %d) status %x\n", + d40c->phy_chan->num, d40c->log_num, + status); + dump_stack(); + ret = -EBUSY; + } + + } +done: + spin_unlock_irqrestore(&d40c->base->execmd_lock, flags); + return ret; +} + +static void d40_term_all(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + struct d40_desc *_d; + + /* Release active descriptors */ + while ((d40d = d40_first_active_get(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release queued descriptors waiting for transfer */ + while ((d40d = d40_first_queued(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release pending descriptors */ + while ((d40d = d40_first_pending(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release client owned descriptors */ + if (!list_empty(&d40c->client)) + list_for_each_entry_safe(d40d, _d, &d40c->client, node) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + /* Release descriptors in prepare queue */ + if (!list_empty(&d40c->prepare_queue)) + list_for_each_entry_safe(d40d, _d, + &d40c->prepare_queue, node) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } + + d40c->pending_tx = 0; +} + +static void __d40_config_set_event(struct d40_chan *d40c, + enum d40_events event_type, u32 event, + int reg) +{ + void __iomem *addr = chan_base(d40c) + reg; + int tries; + u32 status; + + switch (event_type) { + + case D40_DEACTIVATE_EVENTLINE: + + writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event)) + | ~D40_EVENTLINE_MASK(event), addr); + break; + + case D40_SUSPEND_REQ_EVENTLINE: + status = (readl(addr) & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + if (status == D40_DEACTIVATE_EVENTLINE || + status == D40_SUSPEND_REQ_EVENTLINE) + break; + + writel((D40_SUSPEND_REQ_EVENTLINE << D40_EVENTLINE_POS(event)) + | ~D40_EVENTLINE_MASK(event), addr); + + for (tries = 0 ; tries < D40_SUSPEND_MAX_IT; tries++) { + + status = (readl(addr) & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + cpu_relax(); + /* + * Reduce the number of bus accesses while + * waiting for the DMA to suspend. + */ + udelay(3); + + if (status == D40_DEACTIVATE_EVENTLINE) + break; + } + + if (tries == D40_SUSPEND_MAX_IT) { + chan_err(d40c, + "unable to stop the event_line chl %d (log: %d)" + "status %x\n", d40c->phy_chan->num, + d40c->log_num, status); + } + break; + + case D40_ACTIVATE_EVENTLINE: + /* + * The hardware sometimes doesn't register the enable when src and dst + * event lines are active on the same logical channel. Retry to ensure + * it does. Usually only one retry is sufficient. + */ + tries = 100; + while (--tries) { + writel((D40_ACTIVATE_EVENTLINE << + D40_EVENTLINE_POS(event)) | + ~D40_EVENTLINE_MASK(event), addr); + + if (readl(addr) & D40_EVENTLINE_MASK(event)) + break; + } + + if (tries != 99) + dev_dbg(chan2dev(d40c), + "[%s] workaround enable S%cLNK (%d tries)\n", + __func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D', + 100 - tries); + + WARN_ON(!tries); + break; + + case D40_ROUND_EVENTLINE: + BUG(); + break; + + } +} + +static void d40_config_set_event(struct d40_chan *d40c, + enum d40_events event_type) +{ + /* Enable event line connected to device (or memcpy) */ + if ((d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) || + (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) { + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); + + __d40_config_set_event(d40c, event_type, event, + D40_CHAN_REG_SSLNK); + } + + if (d40c->dma_cfg.dir != STEDMA40_PERIPH_TO_MEM) { + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); + + __d40_config_set_event(d40c, event_type, event, + D40_CHAN_REG_SDLNK); + } +} + +static u32 d40_chan_has_events(struct d40_chan *d40c) +{ + void __iomem *chanbase = chan_base(d40c); + u32 val; + + val = readl(chanbase + D40_CHAN_REG_SSLNK); + val |= readl(chanbase + D40_CHAN_REG_SDLNK); + + return val; +} + +static int +__d40_execute_command_log(struct d40_chan *d40c, enum d40_command command) +{ + unsigned long flags; + int ret = 0; + u32 active_status; + void __iomem *active_reg; + + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + + spin_lock_irqsave(&d40c->phy_chan->lock, flags); + + switch (command) { + case D40_DMA_STOP: + case D40_DMA_SUSPEND_REQ: + + active_status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + if (active_status == D40_DMA_RUN) + d40_config_set_event(d40c, D40_SUSPEND_REQ_EVENTLINE); + else + d40_config_set_event(d40c, D40_DEACTIVATE_EVENTLINE); + + if (!d40_chan_has_events(d40c) && (command == D40_DMA_STOP)) + ret = __d40_execute_command_phy(d40c, command); + + break; + + case D40_DMA_RUN: + + d40_config_set_event(d40c, D40_ACTIVATE_EVENTLINE); + ret = __d40_execute_command_phy(d40c, command); + break; + + case D40_DMA_SUSPENDED: + BUG(); + break; + } + + spin_unlock_irqrestore(&d40c->phy_chan->lock, flags); + return ret; +} + +static int d40_channel_execute_command(struct d40_chan *d40c, + enum d40_command command) +{ + if (chan_is_logical(d40c)) + return __d40_execute_command_log(d40c, command); + else + return __d40_execute_command_phy(d40c, command); +} + +static u32 d40_get_prmo(struct d40_chan *d40c) +{ + static const unsigned int phy_map[] = { + [STEDMA40_PCHAN_BASIC_MODE] + = D40_DREG_PRMO_PCHAN_BASIC, + [STEDMA40_PCHAN_MODULO_MODE] + = D40_DREG_PRMO_PCHAN_MODULO, + [STEDMA40_PCHAN_DOUBLE_DST_MODE] + = D40_DREG_PRMO_PCHAN_DOUBLE_DST, + }; + static const unsigned int log_map[] = { + [STEDMA40_LCHAN_SRC_PHY_DST_LOG] + = D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG, + [STEDMA40_LCHAN_SRC_LOG_DST_PHY] + = D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY, + [STEDMA40_LCHAN_SRC_LOG_DST_LOG] + = D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG, + }; + + if (chan_is_physical(d40c)) + return phy_map[d40c->dma_cfg.mode_opt]; + else + return log_map[d40c->dma_cfg.mode_opt]; +} + +static void d40_config_write(struct d40_chan *d40c) +{ + u32 addr_base; + u32 var; + + /* Odd addresses are even addresses + 4 */ + addr_base = (d40c->phy_chan->num % 2) * 4; + /* Setup channel mode to logical or physical */ + var = ((u32)(chan_is_logical(d40c)) + 1) << + D40_CHAN_POS(d40c->phy_chan->num); + writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base); + + /* Setup operational mode option register */ + var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num); + + writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base); + + if (chan_is_logical(d40c)) { + int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) + & D40_SREG_ELEM_LOG_LIDX_MASK; + void __iomem *chanbase = chan_base(d40c); + + /* Set default config for CFG reg */ + writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG); + writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG); + + /* Set LIDX for lcla */ + writel(lidx, chanbase + D40_CHAN_REG_SSELT); + writel(lidx, chanbase + D40_CHAN_REG_SDELT); + + /* Clear LNK which will be used by d40_chan_has_events() */ + writel(0, chanbase + D40_CHAN_REG_SSLNK); + writel(0, chanbase + D40_CHAN_REG_SDLNK); + } +} + +static u32 d40_residue(struct d40_chan *d40c) +{ + u32 num_elt; + + if (chan_is_logical(d40c)) + num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK) + >> D40_MEM_LCSP2_ECNT_POS; + else { + u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT); + num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK) + >> D40_SREG_ELEM_PHY_ECNT_POS; + } + + return num_elt * (1 << d40c->dma_cfg.dst_info.data_width); +} + +static bool d40_tx_is_linked(struct d40_chan *d40c) +{ + bool is_link; + + if (chan_is_logical(d40c)) + is_link = readl(&d40c->lcpa->lcsp3) & D40_MEM_LCSP3_DLOS_MASK; + else + is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK) + & D40_SREG_LNK_PHYS_LNK_MASK; + + return is_link; +} + +static int d40_pause(struct d40_chan *d40c) +{ + int res = 0; + unsigned long flags; + + if (!d40c->busy) + return 0; + + pm_runtime_get_sync(d40c->base->dev); + spin_lock_irqsave(&d40c->lock, flags); + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static int d40_resume(struct d40_chan *d40c) +{ + int res = 0; + unsigned long flags; + + if (!d40c->busy) + return 0; + + spin_lock_irqsave(&d40c->lock, flags); + pm_runtime_get_sync(d40c->base->dev); + + /* If bytes left to transfer or linked tx resume job */ + if (d40_residue(d40c) || d40_tx_is_linked(d40c)) + res = d40_channel_execute_command(d40c, D40_DMA_RUN); + + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct d40_chan *d40c = container_of(tx->chan, + struct d40_chan, + chan); + struct d40_desc *d40d = container_of(tx, struct d40_desc, txd); + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&d40c->lock, flags); + cookie = dma_cookie_assign(tx); + d40_desc_queue(d40c, d40d); + spin_unlock_irqrestore(&d40c->lock, flags); + + return cookie; +} + +static int d40_start(struct d40_chan *d40c) +{ + return d40_channel_execute_command(d40c, D40_DMA_RUN); +} + +static struct d40_desc *d40_queue_start(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + int err; + + /* Start queued jobs, if any */ + d40d = d40_first_queued(d40c); + + if (d40d != NULL) { + if (!d40c->busy) { + d40c->busy = true; + pm_runtime_get_sync(d40c->base->dev); + } + + /* Remove from queue */ + d40_desc_remove(d40d); + + /* Add to active queue */ + d40_desc_submit(d40c, d40d); + + /* Initiate DMA job */ + d40_desc_load(d40c, d40d); + + /* Start dma job */ + err = d40_start(d40c); + + if (err) + return NULL; + } + + return d40d; +} + +/* called from interrupt context */ +static void dma_tc_handle(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + + /* Get first active entry from list */ + d40d = d40_first_active_get(d40c); + + if (d40d == NULL) + return; + + if (d40d->cyclic) { + /* + * If this was a paritially loaded list, we need to reloaded + * it, and only when the list is completed. We need to check + * for done because the interrupt will hit for every link, and + * not just the last one. + */ + if (d40d->lli_current < d40d->lli_len + && !d40_tx_is_linked(d40c) + && !d40_residue(d40c)) { + d40_lcla_free_all(d40c, d40d); + d40_desc_load(d40c, d40d); + (void) d40_start(d40c); + + if (d40d->lli_current == d40d->lli_len) + d40d->lli_current = 0; + } + } else { + d40_lcla_free_all(d40c, d40d); + + if (d40d->lli_current < d40d->lli_len) { + d40_desc_load(d40c, d40d); + /* Start dma job */ + (void) d40_start(d40c); + return; + } + + if (d40_queue_start(d40c) == NULL) + d40c->busy = false; + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + } + + d40c->pending_tx++; + tasklet_schedule(&d40c->tasklet); + +} + +static void dma_tasklet(unsigned long data) +{ + struct d40_chan *d40c = (struct d40_chan *) data; + struct d40_desc *d40d; + unsigned long flags; + dma_async_tx_callback callback; + void *callback_param; + + spin_lock_irqsave(&d40c->lock, flags); + + /* Get first active entry from list */ + d40d = d40_first_active_get(d40c); + if (d40d == NULL) + goto err; + + if (!d40d->cyclic) + dma_cookie_complete(&d40d->txd); + + /* + * If terminating a channel pending_tx is set to zero. + * This prevents any finished active jobs to return to the client. + */ + if (d40c->pending_tx == 0) { + spin_unlock_irqrestore(&d40c->lock, flags); + return; + } + + /* Callback to client */ + callback = d40d->txd.callback; + callback_param = d40d->txd.callback_param; + + if (!d40d->cyclic) { + if (async_tx_test_ack(&d40d->txd)) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } else { + if (!d40d->is_in_client_list) { + d40_desc_remove(d40d); + d40_lcla_free_all(d40c, d40d); + list_add_tail(&d40d->node, &d40c->client); + d40d->is_in_client_list = true; + } + } + } + + d40c->pending_tx--; + + if (d40c->pending_tx) + tasklet_schedule(&d40c->tasklet); + + spin_unlock_irqrestore(&d40c->lock, flags); + + if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT)) + callback(callback_param); + + return; + +err: + /* Rescue manouver if receiving double interrupts */ + if (d40c->pending_tx > 0) + d40c->pending_tx--; + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static irqreturn_t d40_handle_interrupt(int irq, void *data) +{ + static const struct d40_interrupt_lookup il[] = { + {D40_DREG_LCTIS0, D40_DREG_LCICR0, false, 0}, + {D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32}, + {D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64}, + {D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96}, + {D40_DREG_LCEIS0, D40_DREG_LCICR0, true, 0}, + {D40_DREG_LCEIS1, D40_DREG_LCICR1, true, 32}, + {D40_DREG_LCEIS2, D40_DREG_LCICR2, true, 64}, + {D40_DREG_LCEIS3, D40_DREG_LCICR3, true, 96}, + {D40_DREG_PCTIS, D40_DREG_PCICR, false, D40_PHY_CHAN}, + {D40_DREG_PCEIS, D40_DREG_PCICR, true, D40_PHY_CHAN}, + }; + + int i; + u32 regs[ARRAY_SIZE(il)]; + u32 idx; + u32 row; + long chan = -1; + struct d40_chan *d40c; + unsigned long flags; + struct d40_base *base = data; + + spin_lock_irqsave(&base->interrupt_lock, flags); + + /* Read interrupt status of both logical and physical channels */ + for (i = 0; i < ARRAY_SIZE(il); i++) + regs[i] = readl(base->virtbase + il[i].src); + + for (;;) { + + chan = find_next_bit((unsigned long *)regs, + BITS_PER_LONG * ARRAY_SIZE(il), chan + 1); + + /* No more set bits found? */ + if (chan == BITS_PER_LONG * ARRAY_SIZE(il)) + break; + + row = chan / BITS_PER_LONG; + idx = chan & (BITS_PER_LONG - 1); + + /* ACK interrupt */ + writel(1 << idx, base->virtbase + il[row].clr); + + if (il[row].offset == D40_PHY_CHAN) + d40c = base->lookup_phy_chans[idx]; + else + d40c = base->lookup_log_chans[il[row].offset + idx]; + spin_lock(&d40c->lock); + + if (!il[row].is_error) + dma_tc_handle(d40c); + else + d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n", + chan, il[row].offset, idx); + + spin_unlock(&d40c->lock); + } + + spin_unlock_irqrestore(&base->interrupt_lock, flags); + + return IRQ_HANDLED; +} + +static int d40_validate_conf(struct d40_chan *d40c, + struct stedma40_chan_cfg *conf) +{ + int res = 0; + u32 dst_event_group = D40_TYPE_TO_GROUP(conf->dst_dev_type); + u32 src_event_group = D40_TYPE_TO_GROUP(conf->src_dev_type); + bool is_log = conf->mode == STEDMA40_MODE_LOGICAL; + + if (!conf->dir) { + chan_err(d40c, "Invalid direction.\n"); + res = -EINVAL; + } + + if (conf->dst_dev_type != STEDMA40_DEV_DST_MEMORY && + d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 && + d40c->runtime_addr == 0) { + + chan_err(d40c, "Invalid TX channel address (%d)\n", + conf->dst_dev_type); + res = -EINVAL; + } + + if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY && + d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 && + d40c->runtime_addr == 0) { + chan_err(d40c, "Invalid RX channel address (%d)\n", + conf->src_dev_type); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_MEM_TO_PERIPH && + dst_event_group == STEDMA40_DEV_DST_MEMORY) { + chan_err(d40c, "Invalid dst\n"); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_PERIPH_TO_MEM && + src_event_group == STEDMA40_DEV_SRC_MEMORY) { + chan_err(d40c, "Invalid src\n"); + res = -EINVAL; + } + + if (src_event_group == STEDMA40_DEV_SRC_MEMORY && + dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) { + chan_err(d40c, "No event line\n"); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_PERIPH_TO_PERIPH && + (src_event_group != dst_event_group)) { + chan_err(d40c, "Invalid event group\n"); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_PERIPH_TO_PERIPH) { + /* + * DMAC HW supports it. Will be added to this driver, + * in case any dma client requires it. + */ + chan_err(d40c, "periph to periph not supported\n"); + res = -EINVAL; + } + + if (d40_psize_2_burst_size(is_log, conf->src_info.psize) * + (1 << conf->src_info.data_width) != + d40_psize_2_burst_size(is_log, conf->dst_info.psize) * + (1 << conf->dst_info.data_width)) { + /* + * The DMAC hardware only supports + * src (burst x width) == dst (burst x width) + */ + + chan_err(d40c, "src (burst x width) != dst (burst x width)\n"); + res = -EINVAL; + } + + return res; +} + +static bool d40_alloc_mask_set(struct d40_phy_res *phy, + bool is_src, int log_event_line, bool is_log, + bool *first_user) +{ + unsigned long flags; + spin_lock_irqsave(&phy->lock, flags); + + *first_user = ((phy->allocated_src | phy->allocated_dst) + == D40_ALLOC_FREE); + + if (!is_log) { + /* Physical interrupts are masked per physical full channel */ + if (phy->allocated_src == D40_ALLOC_FREE && + phy->allocated_dst == D40_ALLOC_FREE) { + phy->allocated_dst = D40_ALLOC_PHY; + phy->allocated_src = D40_ALLOC_PHY; + goto found; + } else + goto not_found; + } + + /* Logical channel */ + if (is_src) { + if (phy->allocated_src == D40_ALLOC_PHY) + goto not_found; + + if (phy->allocated_src == D40_ALLOC_FREE) + phy->allocated_src = D40_ALLOC_LOG_FREE; + + if (!(phy->allocated_src & (1 << log_event_line))) { + phy->allocated_src |= 1 << log_event_line; + goto found; + } else + goto not_found; + } else { + if (phy->allocated_dst == D40_ALLOC_PHY) + goto not_found; + + if (phy->allocated_dst == D40_ALLOC_FREE) + phy->allocated_dst = D40_ALLOC_LOG_FREE; + + if (!(phy->allocated_dst & (1 << log_event_line))) { + phy->allocated_dst |= 1 << log_event_line; + goto found; + } else + goto not_found; + } + +not_found: + spin_unlock_irqrestore(&phy->lock, flags); + return false; +found: + spin_unlock_irqrestore(&phy->lock, flags); + return true; +} + +static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src, + int log_event_line) +{ + unsigned long flags; + bool is_free = false; + + spin_lock_irqsave(&phy->lock, flags); + if (!log_event_line) { + phy->allocated_dst = D40_ALLOC_FREE; + phy->allocated_src = D40_ALLOC_FREE; + is_free = true; + goto out; + } + + /* Logical channel */ + if (is_src) { + phy->allocated_src &= ~(1 << log_event_line); + if (phy->allocated_src == D40_ALLOC_LOG_FREE) + phy->allocated_src = D40_ALLOC_FREE; + } else { + phy->allocated_dst &= ~(1 << log_event_line); + if (phy->allocated_dst == D40_ALLOC_LOG_FREE) + phy->allocated_dst = D40_ALLOC_FREE; + } + + is_free = ((phy->allocated_src | phy->allocated_dst) == + D40_ALLOC_FREE); + +out: + spin_unlock_irqrestore(&phy->lock, flags); + + return is_free; +} + +static int d40_allocate_channel(struct d40_chan *d40c, bool *first_phy_user) +{ + int dev_type; + int event_group; + int event_line; + struct d40_phy_res *phys; + int i; + int j; + int log_num; + bool is_src; + bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL; + + phys = d40c->base->phy_res; + + if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) { + dev_type = d40c->dma_cfg.src_dev_type; + log_num = 2 * dev_type; + is_src = true; + } else if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + /* dst event lines are used for logical memcpy */ + dev_type = d40c->dma_cfg.dst_dev_type; + log_num = 2 * dev_type + 1; + is_src = false; + } else + return -EINVAL; + + event_group = D40_TYPE_TO_GROUP(dev_type); + event_line = D40_TYPE_TO_EVENT(dev_type); + + if (!is_log) { + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + /* Find physical half channel */ + for (i = 0; i < d40c->base->num_phy_chans; i++) { + + if (d40_alloc_mask_set(&phys[i], is_src, + 0, is_log, + first_phy_user)) + goto found_phy; + } + } else + for (j = 0; j < d40c->base->num_phy_chans; j += 8) { + int phy_num = j + event_group * 2; + for (i = phy_num; i < phy_num + 2; i++) { + if (d40_alloc_mask_set(&phys[i], + is_src, + 0, + is_log, + first_phy_user)) + goto found_phy; + } + } + return -EINVAL; +found_phy: + d40c->phy_chan = &phys[i]; + d40c->log_num = D40_PHY_CHAN; + goto out; + } + if (dev_type == -1) + return -EINVAL; + + /* Find logical channel */ + for (j = 0; j < d40c->base->num_phy_chans; j += 8) { + int phy_num = j + event_group * 2; + + if (d40c->dma_cfg.use_fixed_channel) { + i = d40c->dma_cfg.phy_channel; + + if ((i != phy_num) && (i != phy_num + 1)) { + dev_err(chan2dev(d40c), + "invalid fixed phy channel %d\n", i); + return -EINVAL; + } + + if (d40_alloc_mask_set(&phys[i], is_src, event_line, + is_log, first_phy_user)) + goto found_log; + + dev_err(chan2dev(d40c), + "could not allocate fixed phy channel %d\n", i); + return -EINVAL; + } + + /* + * Spread logical channels across all available physical rather + * than pack every logical channel at the first available phy + * channels. + */ + if (is_src) { + for (i = phy_num; i < phy_num + 2; i++) { + if (d40_alloc_mask_set(&phys[i], is_src, + event_line, is_log, + first_phy_user)) + goto found_log; + } + } else { + for (i = phy_num + 1; i >= phy_num; i--) { + if (d40_alloc_mask_set(&phys[i], is_src, + event_line, is_log, + first_phy_user)) + goto found_log; + } + } + } + return -EINVAL; + +found_log: + d40c->phy_chan = &phys[i]; + d40c->log_num = log_num; +out: + + if (is_log) + d40c->base->lookup_log_chans[d40c->log_num] = d40c; + else + d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c; + + return 0; + +} + +static int d40_config_memcpy(struct d40_chan *d40c) +{ + dma_cap_mask_t cap = d40c->chan.device->cap_mask; + + if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) { + d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_log; + d40c->dma_cfg.src_dev_type = STEDMA40_DEV_SRC_MEMORY; + d40c->dma_cfg.dst_dev_type = d40c->base->plat_data-> + memcpy[d40c->chan.chan_id]; + + } else if (dma_has_cap(DMA_MEMCPY, cap) && + dma_has_cap(DMA_SLAVE, cap)) { + d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy; + } else { + chan_err(d40c, "No memcpy\n"); + return -EINVAL; + } + + return 0; +} + +static int d40_free_dma(struct d40_chan *d40c) +{ + + int res = 0; + u32 event; + struct d40_phy_res *phy = d40c->phy_chan; + bool is_src; + + /* Terminate all queued and active transfers */ + d40_term_all(d40c); + + if (phy == NULL) { + chan_err(d40c, "phy == null\n"); + return -EINVAL; + } + + if (phy->allocated_src == D40_ALLOC_FREE && + phy->allocated_dst == D40_ALLOC_FREE) { + chan_err(d40c, "channel already free\n"); + return -EINVAL; + } + + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); + is_src = false; + } else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) { + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); + is_src = true; + } else { + chan_err(d40c, "Unknown direction\n"); + return -EINVAL; + } + + pm_runtime_get_sync(d40c->base->dev); + res = d40_channel_execute_command(d40c, D40_DMA_STOP); + if (res) { + chan_err(d40c, "stop failed\n"); + goto out; + } + + d40_alloc_mask_free(phy, is_src, chan_is_logical(d40c) ? event : 0); + + if (chan_is_logical(d40c)) + d40c->base->lookup_log_chans[d40c->log_num] = NULL; + else + d40c->base->lookup_phy_chans[phy->num] = NULL; + + if (d40c->busy) { + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + } + + d40c->busy = false; + d40c->phy_chan = NULL; + d40c->configured = false; +out: + + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + return res; +} + +static bool d40_is_paused(struct d40_chan *d40c) +{ + void __iomem *chanbase = chan_base(d40c); + bool is_paused = false; + unsigned long flags; + void __iomem *active_reg; + u32 status; + u32 event; + + spin_lock_irqsave(&d40c->lock, flags); + + if (chan_is_physical(d40c)) { + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP) + is_paused = true; + + goto _exit; + } + + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); + status = readl(chanbase + D40_CHAN_REG_SDLNK); + } else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) { + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); + status = readl(chanbase + D40_CHAN_REG_SSLNK); + } else { + chan_err(d40c, "Unknown direction\n"); + goto _exit; + } + + status = (status & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + if (status != D40_DMA_RUN) + is_paused = true; +_exit: + spin_unlock_irqrestore(&d40c->lock, flags); + return is_paused; + +} + + +static u32 stedma40_residue(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + u32 bytes_left; + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + bytes_left = d40_residue(d40c); + spin_unlock_irqrestore(&d40c->lock, flags); + + return bytes_left; +} + +static int +d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc, + struct scatterlist *sg_src, struct scatterlist *sg_dst, + unsigned int sg_len, dma_addr_t src_dev_addr, + dma_addr_t dst_dev_addr) +{ + struct stedma40_chan_cfg *cfg = &chan->dma_cfg; + struct stedma40_half_channel_info *src_info = &cfg->src_info; + struct stedma40_half_channel_info *dst_info = &cfg->dst_info; + int ret; + + ret = d40_log_sg_to_lli(sg_src, sg_len, + src_dev_addr, + desc->lli_log.src, + chan->log_def.lcsp1, + src_info->data_width, + dst_info->data_width); + + ret = d40_log_sg_to_lli(sg_dst, sg_len, + dst_dev_addr, + desc->lli_log.dst, + chan->log_def.lcsp3, + dst_info->data_width, + src_info->data_width); + + return ret < 0 ? ret : 0; +} + +static int +d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc, + struct scatterlist *sg_src, struct scatterlist *sg_dst, + unsigned int sg_len, dma_addr_t src_dev_addr, + dma_addr_t dst_dev_addr) +{ + struct stedma40_chan_cfg *cfg = &chan->dma_cfg; + struct stedma40_half_channel_info *src_info = &cfg->src_info; + struct stedma40_half_channel_info *dst_info = &cfg->dst_info; + unsigned long flags = 0; + int ret; + + if (desc->cyclic) + flags |= LLI_CYCLIC | LLI_TERM_INT; + + ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr, + desc->lli_phy.src, + virt_to_phys(desc->lli_phy.src), + chan->src_def_cfg, + src_info, dst_info, flags); + + ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr, + desc->lli_phy.dst, + virt_to_phys(desc->lli_phy.dst), + chan->dst_def_cfg, + dst_info, src_info, flags); + + dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr, + desc->lli_pool.size, DMA_TO_DEVICE); + + return ret < 0 ? ret : 0; +} + + +static struct d40_desc * +d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg, + unsigned int sg_len, unsigned long dma_flags) +{ + struct stedma40_chan_cfg *cfg = &chan->dma_cfg; + struct d40_desc *desc; + int ret; + + desc = d40_desc_get(chan); + if (!desc) + return NULL; + + desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width, + cfg->dst_info.data_width); + if (desc->lli_len < 0) { + chan_err(chan, "Unaligned size\n"); + goto err; + } + + ret = d40_pool_lli_alloc(chan, desc, desc->lli_len); + if (ret < 0) { + chan_err(chan, "Could not allocate lli\n"); + goto err; + } + + + desc->lli_current = 0; + desc->txd.flags = dma_flags; + desc->txd.tx_submit = d40_tx_submit; + + dma_async_tx_descriptor_init(&desc->txd, &chan->chan); + + return desc; + +err: + d40_desc_free(chan, desc); + return NULL; +} + +static dma_addr_t +d40_get_dev_addr(struct d40_chan *chan, enum dma_transfer_direction direction) +{ + struct stedma40_platform_data *plat = chan->base->plat_data; + struct stedma40_chan_cfg *cfg = &chan->dma_cfg; + dma_addr_t addr = 0; + + if (chan->runtime_addr) + return chan->runtime_addr; + + if (direction == DMA_DEV_TO_MEM) + addr = plat->dev_rx[cfg->src_dev_type]; + else if (direction == DMA_MEM_TO_DEV) + addr = plat->dev_tx[cfg->dst_dev_type]; + + return addr; +} + +static struct dma_async_tx_descriptor * +d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src, + struct scatterlist *sg_dst, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long dma_flags) +{ + struct d40_chan *chan = container_of(dchan, struct d40_chan, chan); + dma_addr_t src_dev_addr = 0; + dma_addr_t dst_dev_addr = 0; + struct d40_desc *desc; + unsigned long flags; + int ret; + + if (!chan->phy_chan) { + chan_err(chan, "Cannot prepare unallocated channel\n"); + return NULL; + } + + + spin_lock_irqsave(&chan->lock, flags); + + desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags); + if (desc == NULL) + goto err; + + if (sg_next(&sg_src[sg_len - 1]) == sg_src) + desc->cyclic = true; + + if (direction != DMA_TRANS_NONE) { + dma_addr_t dev_addr = d40_get_dev_addr(chan, direction); + + if (direction == DMA_DEV_TO_MEM) + src_dev_addr = dev_addr; + else if (direction == DMA_MEM_TO_DEV) + dst_dev_addr = dev_addr; + } + + if (chan_is_logical(chan)) + ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst, + sg_len, src_dev_addr, dst_dev_addr); + else + ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst, + sg_len, src_dev_addr, dst_dev_addr); + + if (ret) { + chan_err(chan, "Failed to prepare %s sg job: %d\n", + chan_is_logical(chan) ? "log" : "phy", ret); + goto err; + } + + /* + * add descriptor to the prepare queue in order to be able + * to free them later in terminate_all + */ + list_add_tail(&desc->node, &chan->prepare_queue); + + spin_unlock_irqrestore(&chan->lock, flags); + + return &desc->txd; + +err: + if (desc) + d40_desc_free(chan, desc); + spin_unlock_irqrestore(&chan->lock, flags); + return NULL; +} + +bool stedma40_filter(struct dma_chan *chan, void *data) +{ + struct stedma40_chan_cfg *info = data; + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int err; + + if (data) { + err = d40_validate_conf(d40c, info); + if (!err) + d40c->dma_cfg = *info; + } else + err = d40_config_memcpy(d40c); + + if (!err) + d40c->configured = true; + + return err == 0; +} +EXPORT_SYMBOL(stedma40_filter); + +static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src) +{ + bool realtime = d40c->dma_cfg.realtime; + bool highprio = d40c->dma_cfg.high_priority; + u32 prioreg = highprio ? D40_DREG_PSEG1 : D40_DREG_PCEG1; + u32 rtreg = realtime ? D40_DREG_RSEG1 : D40_DREG_RCEG1; + u32 event = D40_TYPE_TO_EVENT(dev_type); + u32 group = D40_TYPE_TO_GROUP(dev_type); + u32 bit = 1 << event; + + /* Destination event lines are stored in the upper halfword */ + if (!src) + bit <<= 16; + + writel(bit, d40c->base->virtbase + prioreg + group * 4); + writel(bit, d40c->base->virtbase + rtreg + group * 4); +} + +static void d40_set_prio_realtime(struct d40_chan *d40c) +{ + if (d40c->base->rev < 3) + return; + + if ((d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) || + (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) + __d40_set_prio_rt(d40c, d40c->dma_cfg.src_dev_type, true); + + if ((d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH) || + (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) + __d40_set_prio_rt(d40c, d40c->dma_cfg.dst_dev_type, false); +} + +/* DMA ENGINE functions */ +static int d40_alloc_chan_resources(struct dma_chan *chan) +{ + int err; + unsigned long flags; + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + bool is_free_phy; + spin_lock_irqsave(&d40c->lock, flags); + + dma_cookie_init(chan); + + /* If no dma configuration is set use default configuration (memcpy) */ + if (!d40c->configured) { + err = d40_config_memcpy(d40c); + if (err) { + chan_err(d40c, "Failed to configure memcpy channel\n"); + goto fail; + } + } + + err = d40_allocate_channel(d40c, &is_free_phy); + if (err) { + chan_err(d40c, "Failed to allocate channel\n"); + d40c->configured = false; + goto fail; + } + + pm_runtime_get_sync(d40c->base->dev); + /* Fill in basic CFG register values */ + d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg, + &d40c->dst_def_cfg, chan_is_logical(d40c)); + + d40_set_prio_realtime(d40c); + + if (chan_is_logical(d40c)) { + d40_log_cfg(&d40c->dma_cfg, + &d40c->log_def.lcsp1, &d40c->log_def.lcsp3); + + if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) + d40c->lcpa = d40c->base->lcpa_base + + d40c->dma_cfg.src_dev_type * D40_LCPA_CHAN_SIZE; + else + d40c->lcpa = d40c->base->lcpa_base + + d40c->dma_cfg.dst_dev_type * + D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA; + } + + dev_dbg(chan2dev(d40c), "allocated %s channel (phy %d%s)\n", + chan_is_logical(d40c) ? "logical" : "physical", + d40c->phy_chan->num, + d40c->dma_cfg.use_fixed_channel ? ", fixed" : ""); + + + /* + * Only write channel configuration to the DMA if the physical + * resource is free. In case of multiple logical channels + * on the same physical resource, only the first write is necessary. + */ + if (is_free_phy) + d40_config_write(d40c); +fail: + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + spin_unlock_irqrestore(&d40c->lock, flags); + return err; +} + +static void d40_free_chan_resources(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int err; + unsigned long flags; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Cannot free unallocated channel\n"); + return; + } + + + spin_lock_irqsave(&d40c->lock, flags); + + err = d40_free_dma(d40c); + + if (err) + chan_err(d40c, "Failed to free channel\n"); + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, + dma_addr_t dst, + dma_addr_t src, + size_t size, + unsigned long dma_flags) +{ + struct scatterlist dst_sg; + struct scatterlist src_sg; + + sg_init_table(&dst_sg, 1); + sg_init_table(&src_sg, 1); + + sg_dma_address(&dst_sg) = dst; + sg_dma_address(&src_sg) = src; + + sg_dma_len(&dst_sg) = size; + sg_dma_len(&src_sg) = size; + + return d40_prep_sg(chan, &src_sg, &dst_sg, 1, DMA_NONE, dma_flags); +} + +static struct dma_async_tx_descriptor * +d40_prep_memcpy_sg(struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long dma_flags) +{ + if (dst_nents != src_nents) + return NULL; + + return d40_prep_sg(chan, src_sg, dst_sg, src_nents, DMA_NONE, dma_flags); +} + +static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long dma_flags, + void *context) +{ + if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) + return NULL; + + return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags); +} + +static struct dma_async_tx_descriptor * +dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_transfer_direction direction, void *context) +{ + unsigned int periods = buf_len / period_len; + struct dma_async_tx_descriptor *txd; + struct scatterlist *sg; + int i; + + sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT); + for (i = 0; i < periods; i++) { + sg_dma_address(&sg[i]) = dma_addr; + sg_dma_len(&sg[i]) = period_len; + dma_addr += period_len; + } + + sg[periods].offset = 0; + sg[periods].length = 0; + sg[periods].page_link = + ((unsigned long)sg | 0x01) & ~0x02; + + txd = d40_prep_sg(chan, sg, sg, periods, direction, + DMA_PREP_INTERRUPT); + + kfree(sg); + + return txd; +} + +static enum dma_status d40_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + enum dma_status ret; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Cannot read status of unallocated channel\n"); + return -EINVAL; + } + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_SUCCESS) + dma_set_residue(txstate, stedma40_residue(chan)); + + if (d40_is_paused(d40c)) + ret = DMA_PAUSED; + + return ret; +} + +static void d40_issue_pending(struct dma_chan *chan) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + unsigned long flags; + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return; + } + + spin_lock_irqsave(&d40c->lock, flags); + + list_splice_tail_init(&d40c->pending_queue, &d40c->queue); + + /* Busy means that queued jobs are already being processed */ + if (!d40c->busy) + (void) d40_queue_start(d40c); + + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static void d40_terminate_all(struct dma_chan *chan) +{ + unsigned long flags; + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + int ret; + + spin_lock_irqsave(&d40c->lock, flags); + + pm_runtime_get_sync(d40c->base->dev); + ret = d40_channel_execute_command(d40c, D40_DMA_STOP); + if (ret) + chan_err(d40c, "Failed to stop channel\n"); + + d40_term_all(d40c); + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + if (d40c->busy) { + pm_runtime_mark_last_busy(d40c->base->dev); + pm_runtime_put_autosuspend(d40c->base->dev); + } + d40c->busy = false; + + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static int +dma40_config_to_halfchannel(struct d40_chan *d40c, + struct stedma40_half_channel_info *info, + enum dma_slave_buswidth width, + u32 maxburst) +{ + enum stedma40_periph_data_width addr_width; + int psize; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + addr_width = STEDMA40_BYTE_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + addr_width = STEDMA40_HALFWORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + addr_width = STEDMA40_WORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_8_BYTES: + addr_width = STEDMA40_DOUBLEWORD_WIDTH; + break; + default: + dev_err(d40c->base->dev, + "illegal peripheral address width " + "requested (%d)\n", + width); + return -EINVAL; + } + + if (chan_is_logical(d40c)) { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_LOG_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_LOG_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_LOG_4; + else + psize = STEDMA40_PSIZE_LOG_1; + } else { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_PHY_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_PHY_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_PHY_4; + else + psize = STEDMA40_PSIZE_PHY_1; + } + + info->data_width = addr_width; + info->psize = psize; + info->flow_ctrl = STEDMA40_NO_FLOW_CTRL; + + return 0; +} + +/* Runtime reconfiguration extension */ +static int d40_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + struct stedma40_chan_cfg *cfg = &d40c->dma_cfg; + enum dma_slave_buswidth src_addr_width, dst_addr_width; + dma_addr_t config_addr; + u32 src_maxburst, dst_maxburst; + int ret; + + src_addr_width = config->src_addr_width; + src_maxburst = config->src_maxburst; + dst_addr_width = config->dst_addr_width; + dst_maxburst = config->dst_maxburst; + + if (config->direction == DMA_DEV_TO_MEM) { + dma_addr_t dev_addr_rx = + d40c->base->plat_data->dev_rx[cfg->src_dev_type]; + + config_addr = config->src_addr; + if (dev_addr_rx) + dev_dbg(d40c->base->dev, + "channel has a pre-wired RX address %08x " + "overriding with %08x\n", + dev_addr_rx, config_addr); + if (cfg->dir != STEDMA40_PERIPH_TO_MEM) + dev_dbg(d40c->base->dev, + "channel was not configured for peripheral " + "to memory transfer (%d) overriding\n", + cfg->dir); + cfg->dir = STEDMA40_PERIPH_TO_MEM; + + /* Configure the memory side */ + if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + dst_addr_width = src_addr_width; + if (dst_maxburst == 0) + dst_maxburst = src_maxburst; + + } else if (config->direction == DMA_MEM_TO_DEV) { + dma_addr_t dev_addr_tx = + d40c->base->plat_data->dev_tx[cfg->dst_dev_type]; + + config_addr = config->dst_addr; + if (dev_addr_tx) + dev_dbg(d40c->base->dev, + "channel has a pre-wired TX address %08x " + "overriding with %08x\n", + dev_addr_tx, config_addr); + if (cfg->dir != STEDMA40_MEM_TO_PERIPH) + dev_dbg(d40c->base->dev, + "channel was not configured for memory " + "to peripheral transfer (%d) overriding\n", + cfg->dir); + cfg->dir = STEDMA40_MEM_TO_PERIPH; + + /* Configure the memory side */ + if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + src_addr_width = dst_addr_width; + if (src_maxburst == 0) + src_maxburst = dst_maxburst; + } else { + dev_err(d40c->base->dev, + "unrecognized channel direction %d\n", + config->direction); + return -EINVAL; + } + + if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) { + dev_err(d40c->base->dev, + "src/dst width/maxburst mismatch: %d*%d != %d*%d\n", + src_maxburst, + src_addr_width, + dst_maxburst, + dst_addr_width); + return -EINVAL; + } + + ret = dma40_config_to_halfchannel(d40c, &cfg->src_info, + src_addr_width, + src_maxburst); + if (ret) + return ret; + + ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info, + dst_addr_width, + dst_maxburst); + if (ret) + return ret; + + /* Fill in register values */ + if (chan_is_logical(d40c)) + d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3); + else + d40_phy_cfg(cfg, &d40c->src_def_cfg, + &d40c->dst_def_cfg, false); + + /* These settings will take precedence later */ + d40c->runtime_addr = config_addr; + d40c->runtime_direction = config->direction; + dev_dbg(d40c->base->dev, + "configured channel %s for %s, data width %d/%d, " + "maxburst %d/%d elements, LE, no flow control\n", + dma_chan_name(chan), + (config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX", + src_addr_width, dst_addr_width, + src_maxburst, dst_maxburst); + + return 0; +} + +static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + + if (d40c->phy_chan == NULL) { + chan_err(d40c, "Channel is not allocated!\n"); + return -EINVAL; + } + + switch (cmd) { + case DMA_TERMINATE_ALL: + d40_terminate_all(chan); + return 0; + case DMA_PAUSE: + return d40_pause(d40c); + case DMA_RESUME: + return d40_resume(d40c); + case DMA_SLAVE_CONFIG: + return d40_set_runtime_config(chan, + (struct dma_slave_config *) arg); + default: + break; + } + + /* Other commands are unimplemented */ + return -ENXIO; +} + +/* Initialization functions */ + +static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, + struct d40_chan *chans, int offset, + int num_chans) +{ + int i = 0; + struct d40_chan *d40c; + + INIT_LIST_HEAD(&dma->channels); + + for (i = offset; i < offset + num_chans; i++) { + d40c = &chans[i]; + d40c->base = base; + d40c->chan.device = dma; + + spin_lock_init(&d40c->lock); + + d40c->log_num = D40_PHY_CHAN; + + INIT_LIST_HEAD(&d40c->active); + INIT_LIST_HEAD(&d40c->queue); + INIT_LIST_HEAD(&d40c->pending_queue); + INIT_LIST_HEAD(&d40c->client); + INIT_LIST_HEAD(&d40c->prepare_queue); + + tasklet_init(&d40c->tasklet, dma_tasklet, + (unsigned long) d40c); + + list_add_tail(&d40c->chan.device_node, + &dma->channels); + } +} + +static void d40_ops_init(struct d40_base *base, struct dma_device *dev) +{ + if (dma_has_cap(DMA_SLAVE, dev->cap_mask)) + dev->device_prep_slave_sg = d40_prep_slave_sg; + + if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) { + dev->device_prep_dma_memcpy = d40_prep_memcpy; + + /* + * This controller can only access address at even + * 32bit boundaries, i.e. 2^2 + */ + dev->copy_align = 2; + } + + if (dma_has_cap(DMA_SG, dev->cap_mask)) + dev->device_prep_dma_sg = d40_prep_memcpy_sg; + + if (dma_has_cap(DMA_CYCLIC, dev->cap_mask)) + dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic; + + dev->device_alloc_chan_resources = d40_alloc_chan_resources; + dev->device_free_chan_resources = d40_free_chan_resources; + dev->device_issue_pending = d40_issue_pending; + dev->device_tx_status = d40_tx_status; + dev->device_control = d40_control; + dev->dev = base->dev; +} + +static int __init d40_dmaengine_init(struct d40_base *base, + int num_reserved_chans) +{ + int err ; + + d40_chan_init(base, &base->dma_slave, base->log_chans, + 0, base->num_log_chans); + + dma_cap_zero(base->dma_slave.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask); + dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask); + + d40_ops_init(base, &base->dma_slave); + + err = dma_async_device_register(&base->dma_slave); + + if (err) { + d40_err(base->dev, "Failed to register slave channels\n"); + goto failure1; + } + + d40_chan_init(base, &base->dma_memcpy, base->log_chans, + base->num_log_chans, base->plat_data->memcpy_len); + + dma_cap_zero(base->dma_memcpy.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); + dma_cap_set(DMA_SG, base->dma_memcpy.cap_mask); + + d40_ops_init(base, &base->dma_memcpy); + + err = dma_async_device_register(&base->dma_memcpy); + + if (err) { + d40_err(base->dev, + "Failed to regsiter memcpy only channels\n"); + goto failure2; + } + + d40_chan_init(base, &base->dma_both, base->phy_chans, + 0, num_reserved_chans); + + dma_cap_zero(base->dma_both.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask); + dma_cap_set(DMA_SG, base->dma_both.cap_mask); + dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask); + + d40_ops_init(base, &base->dma_both); + err = dma_async_device_register(&base->dma_both); + + if (err) { + d40_err(base->dev, + "Failed to register logical and physical capable channels\n"); + goto failure3; + } + return 0; +failure3: + dma_async_device_unregister(&base->dma_memcpy); +failure2: + dma_async_device_unregister(&base->dma_slave); +failure1: + return err; +} + +/* Suspend resume functionality */ +#ifdef CONFIG_PM +static int dma40_pm_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct d40_base *base = platform_get_drvdata(pdev); + int ret = 0; + if (!pm_runtime_suspended(dev)) + return -EBUSY; + + if (base->lcpa_regulator) + ret = regulator_disable(base->lcpa_regulator); + return ret; +} + +static int dma40_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct d40_base *base = platform_get_drvdata(pdev); + + d40_save_restore_registers(base, true); + + /* Don't disable/enable clocks for v1 due to HW bugs */ + if (base->rev != 1) + writel_relaxed(base->gcc_pwr_off_mask, + base->virtbase + D40_DREG_GCC); + + return 0; +} + +static int dma40_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct d40_base *base = platform_get_drvdata(pdev); + + if (base->initialized) + d40_save_restore_registers(base, false); + + writel_relaxed(D40_DREG_GCC_ENABLE_ALL, + base->virtbase + D40_DREG_GCC); + return 0; +} + +static int dma40_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct d40_base *base = platform_get_drvdata(pdev); + int ret = 0; + + if (base->lcpa_regulator) + ret = regulator_enable(base->lcpa_regulator); + + return ret; +} + +static const struct dev_pm_ops dma40_pm_ops = { + .suspend = dma40_pm_suspend, + .runtime_suspend = dma40_runtime_suspend, + .runtime_resume = dma40_runtime_resume, + .resume = dma40_resume, +}; +#define DMA40_PM_OPS (&dma40_pm_ops) +#else +#define DMA40_PM_OPS NULL +#endif + +/* Initialization functions. */ + +static int __init d40_phy_res_init(struct d40_base *base) +{ + int i; + int num_phy_chans_avail = 0; + u32 val[2]; + int odd_even_bit = -2; + int gcc = D40_DREG_GCC_ENA; + + val[0] = readl(base->virtbase + D40_DREG_PRSME); + val[1] = readl(base->virtbase + D40_DREG_PRSMO); + + for (i = 0; i < base->num_phy_chans; i++) { + base->phy_res[i].num = i; + odd_even_bit += 2 * ((i % 2) == 0); + if (((val[i % 2] >> odd_even_bit) & 3) == 1) { + /* Mark security only channels as occupied */ + base->phy_res[i].allocated_src = D40_ALLOC_PHY; + base->phy_res[i].allocated_dst = D40_ALLOC_PHY; + base->phy_res[i].reserved = true; + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i), + D40_DREG_GCC_SRC); + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i), + D40_DREG_GCC_DST); + + + } else { + base->phy_res[i].allocated_src = D40_ALLOC_FREE; + base->phy_res[i].allocated_dst = D40_ALLOC_FREE; + base->phy_res[i].reserved = false; + num_phy_chans_avail++; + } + spin_lock_init(&base->phy_res[i].lock); + } + + /* Mark disabled channels as occupied */ + for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) { + int chan = base->plat_data->disabled_channels[i]; + + base->phy_res[chan].allocated_src = D40_ALLOC_PHY; + base->phy_res[chan].allocated_dst = D40_ALLOC_PHY; + base->phy_res[chan].reserved = true; + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan), + D40_DREG_GCC_SRC); + gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan), + D40_DREG_GCC_DST); + num_phy_chans_avail--; + } + + dev_info(base->dev, "%d of %d physical DMA channels available\n", + num_phy_chans_avail, base->num_phy_chans); + + /* Verify settings extended vs standard */ + val[0] = readl(base->virtbase + D40_DREG_PRTYP); + + for (i = 0; i < base->num_phy_chans; i++) { + + if (base->phy_res[i].allocated_src == D40_ALLOC_FREE && + (val[0] & 0x3) != 1) + dev_info(base->dev, + "[%s] INFO: channel %d is misconfigured (%d)\n", + __func__, i, val[0] & 0x3); + + val[0] = val[0] >> 2; + } + + /* + * To keep things simple, Enable all clocks initially. + * The clocks will get managed later post channel allocation. + * The clocks for the event lines on which reserved channels exists + * are not managed here. + */ + writel(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC); + base->gcc_pwr_off_mask = gcc; + + return num_phy_chans_avail; +} + +static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) +{ + struct stedma40_platform_data *plat_data; + struct clk *clk = NULL; + void __iomem *virtbase = NULL; + struct resource *res = NULL; + struct d40_base *base = NULL; + int num_log_chans = 0; + int num_phy_chans; + int i; + u32 pid; + u32 cid; + u8 rev; + + clk = clk_get(&pdev->dev, NULL); + + if (IS_ERR(clk)) { + d40_err(&pdev->dev, "No matching clock found\n"); + goto failure; + } + + clk_enable(clk); + + /* Get IO for DMAC base address */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base"); + if (!res) + goto failure; + + if (request_mem_region(res->start, resource_size(res), + D40_NAME " I/O base") == NULL) + goto failure; + + virtbase = ioremap(res->start, resource_size(res)); + if (!virtbase) + goto failure; + + /* This is just a regular AMBA PrimeCell ID actually */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i) + & 255) << (i * 8); + for (cid = 0, i = 0; i < 4; i++) + cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i) + & 255) << (i * 8); + + if (cid != AMBA_CID) { + d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n"); + goto failure; + } + if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) { + d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n", + AMBA_MANF_BITS(pid), + AMBA_VENDOR_ST); + goto failure; + } + /* + * HW revision: + * DB8500ed has revision 0 + * ? has revision 1 + * DB8500v1 has revision 2 + * DB8500v2 has revision 3 + */ + rev = AMBA_REV_BITS(pid); + + /* The number of physical channels on this HW */ + num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; + + dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n", + rev, res->start); + + if (rev < 2) { + d40_err(&pdev->dev, "hardware revision: %d is not supported", + rev); + goto failure; + } + + plat_data = pdev->dev.platform_data; + + /* Count the number of logical channels in use */ + for (i = 0; i < plat_data->dev_len; i++) + if (plat_data->dev_rx[i] != 0) + num_log_chans++; + + for (i = 0; i < plat_data->dev_len; i++) + if (plat_data->dev_tx[i] != 0) + num_log_chans++; + + base = kzalloc(ALIGN(sizeof(struct d40_base), 4) + + (num_phy_chans + num_log_chans + plat_data->memcpy_len) * + sizeof(struct d40_chan), GFP_KERNEL); + + if (base == NULL) { + d40_err(&pdev->dev, "Out of memory\n"); + goto failure; + } + + base->rev = rev; + base->clk = clk; + base->num_phy_chans = num_phy_chans; + base->num_log_chans = num_log_chans; + base->phy_start = res->start; + base->phy_size = resource_size(res); + base->virtbase = virtbase; + base->plat_data = plat_data; + base->dev = &pdev->dev; + base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4); + base->log_chans = &base->phy_chans[num_phy_chans]; + + base->phy_res = kzalloc(num_phy_chans * sizeof(struct d40_phy_res), + GFP_KERNEL); + if (!base->phy_res) + goto failure; + + base->lookup_phy_chans = kzalloc(num_phy_chans * + sizeof(struct d40_chan *), + GFP_KERNEL); + if (!base->lookup_phy_chans) + goto failure; + + if (num_log_chans + plat_data->memcpy_len) { + /* + * The max number of logical channels are event lines for all + * src devices and dst devices + */ + base->lookup_log_chans = kzalloc(plat_data->dev_len * 2 * + sizeof(struct d40_chan *), + GFP_KERNEL); + if (!base->lookup_log_chans) + goto failure; + } + + base->reg_val_backup_chan = kmalloc(base->num_phy_chans * + sizeof(d40_backup_regs_chan), + GFP_KERNEL); + if (!base->reg_val_backup_chan) + goto failure; + + base->lcla_pool.alloc_map = + kzalloc(num_phy_chans * sizeof(struct d40_desc *) + * D40_LCLA_LINK_PER_EVENT_GRP, GFP_KERNEL); + if (!base->lcla_pool.alloc_map) + goto failure; + + base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (base->desc_slab == NULL) + goto failure; + + return base; + +failure: + if (!IS_ERR(clk)) { + clk_disable(clk); + clk_put(clk); + } + if (virtbase) + iounmap(virtbase); + if (res) + release_mem_region(res->start, + resource_size(res)); + if (virtbase) + iounmap(virtbase); + + if (base) { + kfree(base->lcla_pool.alloc_map); + kfree(base->reg_val_backup_chan); + kfree(base->lookup_log_chans); + kfree(base->lookup_phy_chans); + kfree(base->phy_res); + kfree(base); + } + + return NULL; +} + +static void __init d40_hw_init(struct d40_base *base) +{ + + static struct d40_reg_val dma_init_reg[] = { + /* Clock every part of the DMA block from start */ + { .reg = D40_DREG_GCC, .val = D40_DREG_GCC_ENABLE_ALL}, + + /* Interrupts on all logical channels */ + { .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF} + }; + int i; + u32 prmseo[2] = {0, 0}; + u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF}; + u32 pcmis = 0; + u32 pcicr = 0; + + for (i = 0; i < ARRAY_SIZE(dma_init_reg); i++) + writel(dma_init_reg[i].val, + base->virtbase + dma_init_reg[i].reg); + + /* Configure all our dma channels to default settings */ + for (i = 0; i < base->num_phy_chans; i++) { + + activeo[i % 2] = activeo[i % 2] << 2; + + if (base->phy_res[base->num_phy_chans - i - 1].allocated_src + == D40_ALLOC_PHY) { + activeo[i % 2] |= 3; + continue; + } + + /* Enable interrupt # */ + pcmis = (pcmis << 1) | 1; + + /* Clear interrupt # */ + pcicr = (pcicr << 1) | 1; + + /* Set channel to physical mode */ + prmseo[i % 2] = prmseo[i % 2] << 2; + prmseo[i % 2] |= 1; + + } + + writel(prmseo[1], base->virtbase + D40_DREG_PRMSE); + writel(prmseo[0], base->virtbase + D40_DREG_PRMSO); + writel(activeo[1], base->virtbase + D40_DREG_ACTIVE); + writel(activeo[0], base->virtbase + D40_DREG_ACTIVO); + + /* Write which interrupt to enable */ + writel(pcmis, base->virtbase + D40_DREG_PCMIS); + + /* Write which interrupt to clear */ + writel(pcicr, base->virtbase + D40_DREG_PCICR); + +} + +static int __init d40_lcla_allocate(struct d40_base *base) +{ + struct d40_lcla_pool *pool = &base->lcla_pool; + unsigned long *page_list; + int i, j; + int ret = 0; + + /* + * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned, + * To full fill this hardware requirement without wasting 256 kb + * we allocate pages until we get an aligned one. + */ + page_list = kmalloc(sizeof(unsigned long) * MAX_LCLA_ALLOC_ATTEMPTS, + GFP_KERNEL); + + if (!page_list) { + ret = -ENOMEM; + goto failure; + } + + /* Calculating how many pages that are required */ + base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE; + + for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) { + page_list[i] = __get_free_pages(GFP_KERNEL, + base->lcla_pool.pages); + if (!page_list[i]) { + + d40_err(base->dev, "Failed to allocate %d pages.\n", + base->lcla_pool.pages); + + for (j = 0; j < i; j++) + free_pages(page_list[j], base->lcla_pool.pages); + goto failure; + } + + if ((virt_to_phys((void *)page_list[i]) & + (LCLA_ALIGNMENT - 1)) == 0) + break; + } + + for (j = 0; j < i; j++) + free_pages(page_list[j], base->lcla_pool.pages); + + if (i < MAX_LCLA_ALLOC_ATTEMPTS) { + base->lcla_pool.base = (void *)page_list[i]; + } else { + /* + * After many attempts and no succees with finding the correct + * alignment, try with allocating a big buffer. + */ + dev_warn(base->dev, + "[%s] Failed to get %d pages @ 18 bit align.\n", + __func__, base->lcla_pool.pages); + base->lcla_pool.base_unaligned = kmalloc(SZ_1K * + base->num_phy_chans + + LCLA_ALIGNMENT, + GFP_KERNEL); + if (!base->lcla_pool.base_unaligned) { + ret = -ENOMEM; + goto failure; + } + + base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned, + LCLA_ALIGNMENT); + } + + pool->dma_addr = dma_map_single(base->dev, pool->base, + SZ_1K * base->num_phy_chans, + DMA_TO_DEVICE); + if (dma_mapping_error(base->dev, pool->dma_addr)) { + pool->dma_addr = 0; + ret = -ENOMEM; + goto failure; + } + + writel(virt_to_phys(base->lcla_pool.base), + base->virtbase + D40_DREG_LCLA); +failure: + kfree(page_list); + return ret; +} + +static int __init d40_probe(struct platform_device *pdev) +{ + int err; + int ret = -ENOENT; + struct d40_base *base; + struct resource *res = NULL; + int num_reserved_chans; + u32 val; + + base = d40_hw_detect_init(pdev); + + if (!base) + goto failure; + + num_reserved_chans = d40_phy_res_init(base); + + platform_set_drvdata(pdev, base); + + spin_lock_init(&base->interrupt_lock); + spin_lock_init(&base->execmd_lock); + + /* Get IO for logical channel parameter address */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa"); + if (!res) { + ret = -ENOENT; + d40_err(&pdev->dev, "No \"lcpa\" memory resource\n"); + goto failure; + } + base->lcpa_size = resource_size(res); + base->phy_lcpa = res->start; + + if (request_mem_region(res->start, resource_size(res), + D40_NAME " I/O lcpa") == NULL) { + ret = -EBUSY; + d40_err(&pdev->dev, + "Failed to request LCPA region 0x%x-0x%x\n", + res->start, res->end); + goto failure; + } + + /* We make use of ESRAM memory for this. */ + val = readl(base->virtbase + D40_DREG_LCPA); + if (res->start != val && val != 0) { + dev_warn(&pdev->dev, + "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n", + __func__, val, res->start); + } else + writel(res->start, base->virtbase + D40_DREG_LCPA); + + base->lcpa_base = ioremap(res->start, resource_size(res)); + if (!base->lcpa_base) { + ret = -ENOMEM; + d40_err(&pdev->dev, "Failed to ioremap LCPA region\n"); + goto failure; + } + /* If lcla has to be located in ESRAM we don't need to allocate */ + if (base->plat_data->use_esram_lcla) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "lcla_esram"); + if (!res) { + ret = -ENOENT; + d40_err(&pdev->dev, + "No \"lcla_esram\" memory resource\n"); + goto failure; + } + base->lcla_pool.base = ioremap(res->start, + resource_size(res)); + if (!base->lcla_pool.base) { + ret = -ENOMEM; + d40_err(&pdev->dev, "Failed to ioremap LCLA region\n"); + goto failure; + } + writel(res->start, base->virtbase + D40_DREG_LCLA); + + } else { + ret = d40_lcla_allocate(base); + if (ret) { + d40_err(&pdev->dev, "Failed to allocate LCLA area\n"); + goto failure; + } + } + + spin_lock_init(&base->lcla_pool.lock); + + base->irq = platform_get_irq(pdev, 0); + + ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base); + if (ret) { + d40_err(&pdev->dev, "No IRQ defined\n"); + goto failure; + } + + pm_runtime_irq_safe(base->dev); + pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(base->dev); + pm_runtime_enable(base->dev); + pm_runtime_resume(base->dev); + + if (base->plat_data->use_esram_lcla) { + + base->lcpa_regulator = regulator_get(base->dev, "lcla_esram"); + if (IS_ERR(base->lcpa_regulator)) { + d40_err(&pdev->dev, "Failed to get lcpa_regulator\n"); + base->lcpa_regulator = NULL; + goto failure; + } + + ret = regulator_enable(base->lcpa_regulator); + if (ret) { + d40_err(&pdev->dev, + "Failed to enable lcpa_regulator\n"); + regulator_put(base->lcpa_regulator); + base->lcpa_regulator = NULL; + goto failure; + } + } + + base->initialized = true; + err = d40_dmaengine_init(base, num_reserved_chans); + if (err) + goto failure; + + d40_hw_init(base); + + dev_info(base->dev, "initialized\n"); + return 0; + +failure: + if (base) { + if (base->desc_slab) + kmem_cache_destroy(base->desc_slab); + if (base->virtbase) + iounmap(base->virtbase); + + if (base->lcla_pool.base && base->plat_data->use_esram_lcla) { + iounmap(base->lcla_pool.base); + base->lcla_pool.base = NULL; + } + + if (base->lcla_pool.dma_addr) + dma_unmap_single(base->dev, base->lcla_pool.dma_addr, + SZ_1K * base->num_phy_chans, + DMA_TO_DEVICE); + + if (!base->lcla_pool.base_unaligned && base->lcla_pool.base) + free_pages((unsigned long)base->lcla_pool.base, + base->lcla_pool.pages); + + kfree(base->lcla_pool.base_unaligned); + + if (base->phy_lcpa) + release_mem_region(base->phy_lcpa, + base->lcpa_size); + if (base->phy_start) + release_mem_region(base->phy_start, + base->phy_size); + if (base->clk) { + clk_disable(base->clk); + clk_put(base->clk); + } + + if (base->lcpa_regulator) { + regulator_disable(base->lcpa_regulator); + regulator_put(base->lcpa_regulator); + } + + kfree(base->lcla_pool.alloc_map); + kfree(base->lookup_log_chans); + kfree(base->lookup_phy_chans); + kfree(base->phy_res); + kfree(base); + } + + d40_err(&pdev->dev, "probe failed\n"); + return ret; +} + +static struct platform_driver d40_driver = { + .driver = { + .owner = THIS_MODULE, + .name = D40_NAME, + .pm = DMA40_PM_OPS, + }, +}; + +static int __init stedma40_init(void) +{ + return platform_driver_probe(&d40_driver, d40_probe); +} +subsys_initcall(stedma40_init); diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c new file mode 100644 index 00000000..cad9e1da --- /dev/null +++ b/drivers/dma/ste_dma40_ll.c @@ -0,0 +1,440 @@ +/* + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + */ + +#include <linux/kernel.h> +#include <plat/ste_dma40.h> + +#include "ste_dma40_ll.h" + +/* Sets up proper LCSP1 and LCSP3 register for a logical channel */ +void d40_log_cfg(struct stedma40_chan_cfg *cfg, + u32 *lcsp1, u32 *lcsp3) +{ + u32 l3 = 0; /* dst */ + u32 l1 = 0; /* src */ + + /* src is mem? -> increase address pos */ + if (cfg->dir == STEDMA40_MEM_TO_PERIPH || + cfg->dir == STEDMA40_MEM_TO_MEM) + l1 |= 1 << D40_MEM_LCSP1_SCFG_INCR_POS; + + /* dst is mem? -> increase address pos */ + if (cfg->dir == STEDMA40_PERIPH_TO_MEM || + cfg->dir == STEDMA40_MEM_TO_MEM) + l3 |= 1 << D40_MEM_LCSP3_DCFG_INCR_POS; + + /* src is hw? -> master port 1 */ + if (cfg->dir == STEDMA40_PERIPH_TO_MEM || + cfg->dir == STEDMA40_PERIPH_TO_PERIPH) + l1 |= 1 << D40_MEM_LCSP1_SCFG_MST_POS; + + /* dst is hw? -> master port 1 */ + if (cfg->dir == STEDMA40_MEM_TO_PERIPH || + cfg->dir == STEDMA40_PERIPH_TO_PERIPH) + l3 |= 1 << D40_MEM_LCSP3_DCFG_MST_POS; + + l3 |= 1 << D40_MEM_LCSP3_DCFG_EIM_POS; + l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS; + l3 |= cfg->dst_info.data_width << D40_MEM_LCSP3_DCFG_ESIZE_POS; + + l1 |= 1 << D40_MEM_LCSP1_SCFG_EIM_POS; + l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS; + l1 |= cfg->src_info.data_width << D40_MEM_LCSP1_SCFG_ESIZE_POS; + + *lcsp1 = l1; + *lcsp3 = l3; + +} + +/* Sets up SRC and DST CFG register for both logical and physical channels */ +void d40_phy_cfg(struct stedma40_chan_cfg *cfg, + u32 *src_cfg, u32 *dst_cfg, bool is_log) +{ + u32 src = 0; + u32 dst = 0; + + if (!is_log) { + /* Physical channel */ + if ((cfg->dir == STEDMA40_PERIPH_TO_MEM) || + (cfg->dir == STEDMA40_PERIPH_TO_PERIPH)) { + /* Set master port to 1 */ + src |= 1 << D40_SREG_CFG_MST_POS; + src |= D40_TYPE_TO_EVENT(cfg->src_dev_type); + + if (cfg->src_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL) + src |= 1 << D40_SREG_CFG_PHY_TM_POS; + else + src |= 3 << D40_SREG_CFG_PHY_TM_POS; + } + if ((cfg->dir == STEDMA40_MEM_TO_PERIPH) || + (cfg->dir == STEDMA40_PERIPH_TO_PERIPH)) { + /* Set master port to 1 */ + dst |= 1 << D40_SREG_CFG_MST_POS; + dst |= D40_TYPE_TO_EVENT(cfg->dst_dev_type); + + if (cfg->dst_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL) + dst |= 1 << D40_SREG_CFG_PHY_TM_POS; + else + dst |= 3 << D40_SREG_CFG_PHY_TM_POS; + } + /* Interrupt on end of transfer for destination */ + dst |= 1 << D40_SREG_CFG_TIM_POS; + + /* Generate interrupt on error */ + src |= 1 << D40_SREG_CFG_EIM_POS; + dst |= 1 << D40_SREG_CFG_EIM_POS; + + /* PSIZE */ + if (cfg->src_info.psize != STEDMA40_PSIZE_PHY_1) { + src |= 1 << D40_SREG_CFG_PHY_PEN_POS; + src |= cfg->src_info.psize << D40_SREG_CFG_PSIZE_POS; + } + if (cfg->dst_info.psize != STEDMA40_PSIZE_PHY_1) { + dst |= 1 << D40_SREG_CFG_PHY_PEN_POS; + dst |= cfg->dst_info.psize << D40_SREG_CFG_PSIZE_POS; + } + + /* Element size */ + src |= cfg->src_info.data_width << D40_SREG_CFG_ESIZE_POS; + dst |= cfg->dst_info.data_width << D40_SREG_CFG_ESIZE_POS; + + } else { + /* Logical channel */ + dst |= 1 << D40_SREG_CFG_LOG_GIM_POS; + src |= 1 << D40_SREG_CFG_LOG_GIM_POS; + } + + if (cfg->high_priority) { + src |= 1 << D40_SREG_CFG_PRI_POS; + dst |= 1 << D40_SREG_CFG_PRI_POS; + } + + if (cfg->src_info.big_endian) + src |= 1 << D40_SREG_CFG_LBE_POS; + if (cfg->dst_info.big_endian) + dst |= 1 << D40_SREG_CFG_LBE_POS; + + *src_cfg = src; + *dst_cfg = dst; +} + +static int d40_phy_fill_lli(struct d40_phy_lli *lli, + dma_addr_t data, + u32 data_size, + dma_addr_t next_lli, + u32 reg_cfg, + struct stedma40_half_channel_info *info, + unsigned int flags) +{ + bool addr_inc = flags & LLI_ADDR_INC; + bool term_int = flags & LLI_TERM_INT; + unsigned int data_width = info->data_width; + int psize = info->psize; + int num_elems; + + if (psize == STEDMA40_PSIZE_PHY_1) + num_elems = 1; + else + num_elems = 2 << psize; + + /* Must be aligned */ + if (!IS_ALIGNED(data, 0x1 << data_width)) + return -EINVAL; + + /* Transfer size can't be smaller than (num_elms * elem_size) */ + if (data_size < num_elems * (0x1 << data_width)) + return -EINVAL; + + /* The number of elements. IE now many chunks */ + lli->reg_elt = (data_size >> data_width) << D40_SREG_ELEM_PHY_ECNT_POS; + + /* + * Distance to next element sized entry. + * Usually the size of the element unless you want gaps. + */ + if (addr_inc) + lli->reg_elt |= (0x1 << data_width) << + D40_SREG_ELEM_PHY_EIDX_POS; + + /* Where the data is */ + lli->reg_ptr = data; + lli->reg_cfg = reg_cfg; + + /* If this scatter list entry is the last one, no next link */ + if (next_lli == 0) + lli->reg_lnk = 0x1 << D40_SREG_LNK_PHY_TCP_POS; + else + lli->reg_lnk = next_lli; + + /* Set/clear interrupt generation on this link item.*/ + if (term_int) + lli->reg_cfg |= 0x1 << D40_SREG_CFG_TIM_POS; + else + lli->reg_cfg &= ~(0x1 << D40_SREG_CFG_TIM_POS); + + /* Post link */ + lli->reg_lnk |= 0 << D40_SREG_LNK_PHY_PRE_POS; + + return 0; +} + +static int d40_seg_size(int size, int data_width1, int data_width2) +{ + u32 max_w = max(data_width1, data_width2); + u32 min_w = min(data_width1, data_width2); + u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w); + + if (seg_max > STEDMA40_MAX_SEG_SIZE) + seg_max -= (1 << max_w); + + if (size <= seg_max) + return size; + + if (size <= 2 * seg_max) + return ALIGN(size / 2, 1 << max_w); + + return seg_max; +} + +static struct d40_phy_lli * +d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size, + dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg, + struct stedma40_half_channel_info *info, + struct stedma40_half_channel_info *otherinfo, + unsigned long flags) +{ + bool lastlink = flags & LLI_LAST_LINK; + bool addr_inc = flags & LLI_ADDR_INC; + bool term_int = flags & LLI_TERM_INT; + bool cyclic = flags & LLI_CYCLIC; + int err; + dma_addr_t next = lli_phys; + int size_rest = size; + int size_seg = 0; + + /* + * This piece may be split up based on d40_seg_size(); we only want the + * term int on the last part. + */ + if (term_int) + flags &= ~LLI_TERM_INT; + + do { + size_seg = d40_seg_size(size_rest, info->data_width, + otherinfo->data_width); + size_rest -= size_seg; + + if (size_rest == 0 && term_int) + flags |= LLI_TERM_INT; + + if (size_rest == 0 && lastlink) + next = cyclic ? first_phys : 0; + else + next = ALIGN(next + sizeof(struct d40_phy_lli), + D40_LLI_ALIGN); + + err = d40_phy_fill_lli(lli, addr, size_seg, next, + reg_cfg, info, flags); + + if (err) + goto err; + + lli++; + if (addr_inc) + addr += size_seg; + } while (size_rest); + + return lli; + + err: + return NULL; +} + +int d40_phy_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t target, + struct d40_phy_lli *lli_sg, + dma_addr_t lli_phys, + u32 reg_cfg, + struct stedma40_half_channel_info *info, + struct stedma40_half_channel_info *otherinfo, + unsigned long flags) +{ + int total_size = 0; + int i; + struct scatterlist *current_sg = sg; + struct d40_phy_lli *lli = lli_sg; + dma_addr_t l_phys = lli_phys; + + if (!target) + flags |= LLI_ADDR_INC; + + for_each_sg(sg, current_sg, sg_len, i) { + dma_addr_t sg_addr = sg_dma_address(current_sg); + unsigned int len = sg_dma_len(current_sg); + dma_addr_t dst = target ?: sg_addr; + + total_size += sg_dma_len(current_sg); + + if (i == sg_len - 1) + flags |= LLI_TERM_INT | LLI_LAST_LINK; + + l_phys = ALIGN(lli_phys + (lli - lli_sg) * + sizeof(struct d40_phy_lli), D40_LLI_ALIGN); + + lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys, + reg_cfg, info, otherinfo, flags); + + if (lli == NULL) + return -EINVAL; + } + + return total_size; +} + + +/* DMA logical lli operations */ + +static void d40_log_lli_link(struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags) +{ + bool interrupt = flags & LLI_TERM_INT; + u32 slos = 0; + u32 dlos = 0; + + if (next != -EINVAL) { + slos = next * 2; + dlos = next * 2 + 1; + } + + if (interrupt) { + lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK; + lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK; + } + + lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) | + (slos << D40_MEM_LCSP1_SLOS_POS); + + lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) | + (dlos << D40_MEM_LCSP1_SLOS_POS); +} + +void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags) +{ + d40_log_lli_link(lli_dst, lli_src, next, flags); + + writel(lli_src->lcsp02, &lcpa[0].lcsp0); + writel(lli_src->lcsp13, &lcpa[0].lcsp1); + writel(lli_dst->lcsp02, &lcpa[0].lcsp2); + writel(lli_dst->lcsp13, &lcpa[0].lcsp3); +} + +void d40_log_lli_lcla_write(struct d40_log_lli *lcla, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags) +{ + d40_log_lli_link(lli_dst, lli_src, next, flags); + + writel(lli_src->lcsp02, &lcla[0].lcsp02); + writel(lli_src->lcsp13, &lcla[0].lcsp13); + writel(lli_dst->lcsp02, &lcla[1].lcsp02); + writel(lli_dst->lcsp13, &lcla[1].lcsp13); +} + +static void d40_log_fill_lli(struct d40_log_lli *lli, + dma_addr_t data, u32 data_size, + u32 reg_cfg, + u32 data_width, + unsigned int flags) +{ + bool addr_inc = flags & LLI_ADDR_INC; + + lli->lcsp13 = reg_cfg; + + /* The number of elements to transfer */ + lli->lcsp02 = ((data_size >> data_width) << + D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK; + + BUG_ON((data_size >> data_width) > STEDMA40_MAX_SEG_SIZE); + + /* 16 LSBs address of the current element */ + lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK; + /* 16 MSBs address of the current element */ + lli->lcsp13 |= data & D40_MEM_LCSP1_SPTR_MASK; + + if (addr_inc) + lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK; + +} + +static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg, + dma_addr_t addr, + int size, + u32 lcsp13, /* src or dst*/ + u32 data_width1, + u32 data_width2, + unsigned int flags) +{ + bool addr_inc = flags & LLI_ADDR_INC; + struct d40_log_lli *lli = lli_sg; + int size_rest = size; + int size_seg = 0; + + do { + size_seg = d40_seg_size(size_rest, data_width1, data_width2); + size_rest -= size_seg; + + d40_log_fill_lli(lli, + addr, + size_seg, + lcsp13, data_width1, + flags); + if (addr_inc) + addr += size_seg; + lli++; + } while (size_rest); + + return lli; +} + +int d40_log_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t dev_addr, + struct d40_log_lli *lli_sg, + u32 lcsp13, /* src or dst*/ + u32 data_width1, u32 data_width2) +{ + int total_size = 0; + struct scatterlist *current_sg = sg; + int i; + struct d40_log_lli *lli = lli_sg; + unsigned long flags = 0; + + if (!dev_addr) + flags |= LLI_ADDR_INC; + + for_each_sg(sg, current_sg, sg_len, i) { + dma_addr_t sg_addr = sg_dma_address(current_sg); + unsigned int len = sg_dma_len(current_sg); + dma_addr_t addr = dev_addr ?: sg_addr; + + total_size += sg_dma_len(current_sg); + + lli = d40_log_buf_to_lli(lli, addr, len, + lcsp13, + data_width1, + data_width2, + flags); + } + + return total_size; +} diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h new file mode 100644 index 00000000..51e8e539 --- /dev/null +++ b/drivers/dma/ste_dma40_ll.h @@ -0,0 +1,347 @@ +/* + * Copyright (C) ST-Ericsson SA 2007-2010 + * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson SA + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson SA + * License terms: GNU General Public License (GPL) version 2 + */ +#ifndef STE_DMA40_LL_H +#define STE_DMA40_LL_H + +#define D40_DREG_PCBASE 0x400 +#define D40_DREG_PCDELTA (8 * 4) +#define D40_LLI_ALIGN 16 /* LLI alignment must be 16 bytes. */ + +#define D40_LCPA_CHAN_SIZE 32 +#define D40_LCPA_CHAN_DST_DELTA 16 + +#define D40_TYPE_TO_GROUP(type) (type / 16) +#define D40_TYPE_TO_EVENT(type) (type % 16) +#define D40_GROUP_SIZE 8 +#define D40_PHYS_TO_GROUP(phys) ((phys & (D40_GROUP_SIZE - 1)) / 2) + +/* Most bits of the CFG register are the same in log as in phy mode */ +#define D40_SREG_CFG_MST_POS 15 +#define D40_SREG_CFG_TIM_POS 14 +#define D40_SREG_CFG_EIM_POS 13 +#define D40_SREG_CFG_LOG_INCR_POS 12 +#define D40_SREG_CFG_PHY_PEN_POS 12 +#define D40_SREG_CFG_PSIZE_POS 10 +#define D40_SREG_CFG_ESIZE_POS 8 +#define D40_SREG_CFG_PRI_POS 7 +#define D40_SREG_CFG_LBE_POS 6 +#define D40_SREG_CFG_LOG_GIM_POS 5 +#define D40_SREG_CFG_LOG_MFU_POS 4 +#define D40_SREG_CFG_PHY_TM_POS 4 +#define D40_SREG_CFG_PHY_EVTL_POS 0 + + +/* Standard channel parameters - basic mode (element register) */ +#define D40_SREG_ELEM_PHY_ECNT_POS 16 +#define D40_SREG_ELEM_PHY_EIDX_POS 0 + +#define D40_SREG_ELEM_PHY_ECNT_MASK (0xFFFF << D40_SREG_ELEM_PHY_ECNT_POS) + +/* Standard channel parameters - basic mode (Link register) */ +#define D40_SREG_LNK_PHY_TCP_POS 0 +#define D40_SREG_LNK_PHY_LMP_POS 1 +#define D40_SREG_LNK_PHY_PRE_POS 2 +/* + * Source destination link address. Contains the + * 29-bit byte word aligned address of the reload area. + */ +#define D40_SREG_LNK_PHYS_LNK_MASK 0xFFFFFFF8UL + +/* Standard basic channel logical mode */ + +/* Element register */ +#define D40_SREG_ELEM_LOG_ECNT_POS 16 +#define D40_SREG_ELEM_LOG_LIDX_POS 8 +#define D40_SREG_ELEM_LOG_LOS_POS 1 +#define D40_SREG_ELEM_LOG_TCP_POS 0 + +#define D40_SREG_ELEM_LOG_LIDX_MASK (0xFF << D40_SREG_ELEM_LOG_LIDX_POS) + +/* Link register */ +#define D40_EVENTLINE_POS(i) (2 * i) +#define D40_EVENTLINE_MASK(i) (0x3 << D40_EVENTLINE_POS(i)) + +/* Standard basic channel logical params in memory */ + +/* LCSP0 */ +#define D40_MEM_LCSP0_ECNT_POS 16 +#define D40_MEM_LCSP0_SPTR_POS 0 + +#define D40_MEM_LCSP0_ECNT_MASK (0xFFFF << D40_MEM_LCSP0_ECNT_POS) +#define D40_MEM_LCSP0_SPTR_MASK (0xFFFF << D40_MEM_LCSP0_SPTR_POS) + +/* LCSP1 */ +#define D40_MEM_LCSP1_SPTR_POS 16 +#define D40_MEM_LCSP1_SCFG_MST_POS 15 +#define D40_MEM_LCSP1_SCFG_TIM_POS 14 +#define D40_MEM_LCSP1_SCFG_EIM_POS 13 +#define D40_MEM_LCSP1_SCFG_INCR_POS 12 +#define D40_MEM_LCSP1_SCFG_PSIZE_POS 10 +#define D40_MEM_LCSP1_SCFG_ESIZE_POS 8 +#define D40_MEM_LCSP1_SLOS_POS 1 +#define D40_MEM_LCSP1_STCP_POS 0 + +#define D40_MEM_LCSP1_SPTR_MASK (0xFFFF << D40_MEM_LCSP1_SPTR_POS) +#define D40_MEM_LCSP1_SCFG_TIM_MASK (0x1 << D40_MEM_LCSP1_SCFG_TIM_POS) +#define D40_MEM_LCSP1_SCFG_INCR_MASK (0x1 << D40_MEM_LCSP1_SCFG_INCR_POS) +#define D40_MEM_LCSP1_SCFG_PSIZE_MASK (0x3 << D40_MEM_LCSP1_SCFG_PSIZE_POS) +#define D40_MEM_LCSP1_SLOS_MASK (0x7F << D40_MEM_LCSP1_SLOS_POS) +#define D40_MEM_LCSP1_STCP_MASK (0x1 << D40_MEM_LCSP1_STCP_POS) + +/* LCSP2 */ +#define D40_MEM_LCSP2_ECNT_POS 16 + +#define D40_MEM_LCSP2_ECNT_MASK (0xFFFF << D40_MEM_LCSP2_ECNT_POS) + +/* LCSP3 */ +#define D40_MEM_LCSP3_DCFG_MST_POS 15 +#define D40_MEM_LCSP3_DCFG_TIM_POS 14 +#define D40_MEM_LCSP3_DCFG_EIM_POS 13 +#define D40_MEM_LCSP3_DCFG_INCR_POS 12 +#define D40_MEM_LCSP3_DCFG_PSIZE_POS 10 +#define D40_MEM_LCSP3_DCFG_ESIZE_POS 8 +#define D40_MEM_LCSP3_DLOS_POS 1 +#define D40_MEM_LCSP3_DTCP_POS 0 + +#define D40_MEM_LCSP3_DLOS_MASK (0x7F << D40_MEM_LCSP3_DLOS_POS) +#define D40_MEM_LCSP3_DTCP_MASK (0x1 << D40_MEM_LCSP3_DTCP_POS) + + +/* Standard channel parameter register offsets */ +#define D40_CHAN_REG_SSCFG 0x00 +#define D40_CHAN_REG_SSELT 0x04 +#define D40_CHAN_REG_SSPTR 0x08 +#define D40_CHAN_REG_SSLNK 0x0C +#define D40_CHAN_REG_SDCFG 0x10 +#define D40_CHAN_REG_SDELT 0x14 +#define D40_CHAN_REG_SDPTR 0x18 +#define D40_CHAN_REG_SDLNK 0x1C + +/* DMA Register Offsets */ +#define D40_DREG_GCC 0x000 +#define D40_DREG_GCC_ENA 0x1 +/* This assumes that there are only 4 event groups */ +#define D40_DREG_GCC_ENABLE_ALL 0xff01 +#define D40_DREG_GCC_EVTGRP_POS 8 +#define D40_DREG_GCC_SRC 0 +#define D40_DREG_GCC_DST 1 +#define D40_DREG_GCC_EVTGRP_ENA(x, y) \ + (1 << (D40_DREG_GCC_EVTGRP_POS + 2 * x + y)) + +#define D40_DREG_PRTYP 0x004 +#define D40_DREG_PRSME 0x008 +#define D40_DREG_PRSMO 0x00C +#define D40_DREG_PRMSE 0x010 +#define D40_DREG_PRMSO 0x014 +#define D40_DREG_PRMOE 0x018 +#define D40_DREG_PRMOO 0x01C +#define D40_DREG_PRMO_PCHAN_BASIC 0x1 +#define D40_DREG_PRMO_PCHAN_MODULO 0x2 +#define D40_DREG_PRMO_PCHAN_DOUBLE_DST 0x3 +#define D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG 0x1 +#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY 0x2 +#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG 0x3 + +#define D40_DREG_LCPA 0x020 +#define D40_DREG_LCLA 0x024 +#define D40_DREG_ACTIVE 0x050 +#define D40_DREG_ACTIVO 0x054 +#define D40_DREG_FSEB1 0x058 +#define D40_DREG_FSEB2 0x05C +#define D40_DREG_PCMIS 0x060 +#define D40_DREG_PCICR 0x064 +#define D40_DREG_PCTIS 0x068 +#define D40_DREG_PCEIS 0x06C +#define D40_DREG_LCMIS0 0x080 +#define D40_DREG_LCMIS1 0x084 +#define D40_DREG_LCMIS2 0x088 +#define D40_DREG_LCMIS3 0x08C +#define D40_DREG_LCICR0 0x090 +#define D40_DREG_LCICR1 0x094 +#define D40_DREG_LCICR2 0x098 +#define D40_DREG_LCICR3 0x09C +#define D40_DREG_LCTIS0 0x0A0 +#define D40_DREG_LCTIS1 0x0A4 +#define D40_DREG_LCTIS2 0x0A8 +#define D40_DREG_LCTIS3 0x0AC +#define D40_DREG_LCEIS0 0x0B0 +#define D40_DREG_LCEIS1 0x0B4 +#define D40_DREG_LCEIS2 0x0B8 +#define D40_DREG_LCEIS3 0x0BC +#define D40_DREG_PSEG1 0x110 +#define D40_DREG_PSEG2 0x114 +#define D40_DREG_PSEG3 0x118 +#define D40_DREG_PSEG4 0x11C +#define D40_DREG_PCEG1 0x120 +#define D40_DREG_PCEG2 0x124 +#define D40_DREG_PCEG3 0x128 +#define D40_DREG_PCEG4 0x12C +#define D40_DREG_RSEG1 0x130 +#define D40_DREG_RSEG2 0x134 +#define D40_DREG_RSEG3 0x138 +#define D40_DREG_RSEG4 0x13C +#define D40_DREG_RCEG1 0x140 +#define D40_DREG_RCEG2 0x144 +#define D40_DREG_RCEG3 0x148 +#define D40_DREG_RCEG4 0x14C +#define D40_DREG_STFU 0xFC8 +#define D40_DREG_ICFG 0xFCC +#define D40_DREG_PERIPHID0 0xFE0 +#define D40_DREG_PERIPHID1 0xFE4 +#define D40_DREG_PERIPHID2 0xFE8 +#define D40_DREG_PERIPHID3 0xFEC +#define D40_DREG_CELLID0 0xFF0 +#define D40_DREG_CELLID1 0xFF4 +#define D40_DREG_CELLID2 0xFF8 +#define D40_DREG_CELLID3 0xFFC + +/* LLI related structures */ + +/** + * struct d40_phy_lli - The basic configration register for each physical + * channel. + * + * @reg_cfg: The configuration register. + * @reg_elt: The element register. + * @reg_ptr: The pointer register. + * @reg_lnk: The link register. + * + * These registers are set up for both physical and logical transfers + * Note that the bit in each register means differently in logical and + * physical(standard) mode. + * + * This struct must be 16 bytes aligned, and only contain physical registers + * since it will be directly accessed by the DMA. + */ +struct d40_phy_lli { + u32 reg_cfg; + u32 reg_elt; + u32 reg_ptr; + u32 reg_lnk; +}; + +/** + * struct d40_phy_lli_bidir - struct for a transfer. + * + * @src: Register settings for src channel. + * @dst: Register settings for dst channel. + * + * All DMA transfers have a source and a destination. + */ + +struct d40_phy_lli_bidir { + struct d40_phy_lli *src; + struct d40_phy_lli *dst; +}; + + +/** + * struct d40_log_lli - logical lli configuration + * + * @lcsp02: Either maps to register lcsp0 if src or lcsp2 if dst. + * @lcsp13: Either maps to register lcsp1 if src or lcsp3 if dst. + * + * This struct must be 8 bytes aligned since it will be accessed directy by + * the DMA. Never add any none hw mapped registers to this struct. + */ + +struct d40_log_lli { + u32 lcsp02; + u32 lcsp13; +}; + +/** + * struct d40_log_lli_bidir - For both src and dst + * + * @src: pointer to src lli configuration. + * @dst: pointer to dst lli configuration. + * + * You always have a src and a dst when doing DMA transfers. + */ + +struct d40_log_lli_bidir { + struct d40_log_lli *src; + struct d40_log_lli *dst; +}; + +/** + * struct d40_log_lli_full - LCPA layout + * + * @lcsp0: Logical Channel Standard Param 0 - Src. + * @lcsp1: Logical Channel Standard Param 1 - Src. + * @lcsp2: Logical Channel Standard Param 2 - Dst. + * @lcsp3: Logical Channel Standard Param 3 - Dst. + * + * This struct maps to LCPA physical memory layout. Must map to + * the hw. + */ +struct d40_log_lli_full { + u32 lcsp0; + u32 lcsp1; + u32 lcsp2; + u32 lcsp3; +}; + +/** + * struct d40_def_lcsp - Default LCSP1 and LCSP3 settings + * + * @lcsp3: The default configuration for dst. + * @lcsp1: The default configuration for src. + */ +struct d40_def_lcsp { + u32 lcsp3; + u32 lcsp1; +}; + +/* Physical channels */ + +enum d40_lli_flags { + LLI_ADDR_INC = 1 << 0, + LLI_TERM_INT = 1 << 1, + LLI_CYCLIC = 1 << 2, + LLI_LAST_LINK = 1 << 3, +}; + +void d40_phy_cfg(struct stedma40_chan_cfg *cfg, + u32 *src_cfg, + u32 *dst_cfg, + bool is_log); + +void d40_log_cfg(struct stedma40_chan_cfg *cfg, + u32 *lcsp1, + u32 *lcsp2); + +int d40_phy_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t target, + struct d40_phy_lli *lli, + dma_addr_t lli_phys, + u32 reg_cfg, + struct stedma40_half_channel_info *info, + struct stedma40_half_channel_info *otherinfo, + unsigned long flags); + +/* Logical channels */ + +int d40_log_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t dev_addr, + struct d40_log_lli *lli_sg, + u32 lcsp13, /* src or dst*/ + u32 data_width1, u32 data_width2); + +void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags); + +void d40_log_lli_lcla_write(struct d40_log_lli *lcla, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int next, unsigned int flags); + +#endif /* STE_DMA40_LLI_H */ diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c new file mode 100644 index 00000000..4e0dff59 --- /dev/null +++ b/drivers/dma/timb_dma.c @@ -0,0 +1,834 @@ +/* + * timb_dma.c timberdale FPGA DMA driver + * Copyright (c) 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA DMA engine + */ + +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <linux/timb_dma.h> + +#include "dmaengine.h" + +#define DRIVER_NAME "timb-dma" + +/* Global DMA registers */ +#define TIMBDMA_ACR 0x34 +#define TIMBDMA_32BIT_ADDR 0x01 + +#define TIMBDMA_ISR 0x080000 +#define TIMBDMA_IPR 0x080004 +#define TIMBDMA_IER 0x080008 + +/* Channel specific registers */ +/* RX instances base addresses are 0x00, 0x40, 0x80 ... + * TX instances base addresses are 0x18, 0x58, 0x98 ... + */ +#define TIMBDMA_INSTANCE_OFFSET 0x40 +#define TIMBDMA_INSTANCE_TX_OFFSET 0x18 + +/* RX registers, relative the instance base */ +#define TIMBDMA_OFFS_RX_DHAR 0x00 +#define TIMBDMA_OFFS_RX_DLAR 0x04 +#define TIMBDMA_OFFS_RX_LR 0x0C +#define TIMBDMA_OFFS_RX_BLR 0x10 +#define TIMBDMA_OFFS_RX_ER 0x14 +#define TIMBDMA_RX_EN 0x01 +/* bytes per Row, video specific register + * which is placed after the TX registers... + */ +#define TIMBDMA_OFFS_RX_BPRR 0x30 + +/* TX registers, relative the instance base */ +#define TIMBDMA_OFFS_TX_DHAR 0x00 +#define TIMBDMA_OFFS_TX_DLAR 0x04 +#define TIMBDMA_OFFS_TX_BLR 0x0C +#define TIMBDMA_OFFS_TX_LR 0x14 + + +#define TIMB_DMA_DESC_SIZE 8 + +struct timb_dma_desc { + struct list_head desc_node; + struct dma_async_tx_descriptor txd; + u8 *desc_list; + unsigned int desc_list_len; + bool interrupt; +}; + +struct timb_dma_chan { + struct dma_chan chan; + void __iomem *membase; + spinlock_t lock; /* Used to protect data structures, + especially the lists and descriptors, + from races between the tasklet and calls + from above */ + bool ongoing; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int bytes_per_line; + enum dma_transfer_direction direction; + unsigned int descs; /* Descriptors to allocate */ + unsigned int desc_elems; /* number of elems per descriptor */ +}; + +struct timb_dma { + struct dma_device dma; + void __iomem *membase; + struct tasklet_struct tasklet; + struct timb_dma_chan channels[0]; +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2dmadev(struct dma_chan *chan) +{ + return chan2dev(chan)->parent->parent; +} + +static struct timb_dma *tdchantotd(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + return (struct timb_dma *)((u8 *)td_chan - + id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma)); +} + +/* Must be called with the spinlock held */ +static void __td_enable_chan_irq(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + struct timb_dma *td = tdchantotd(td_chan); + u32 ier; + + /* enable interrupt for this channel */ + ier = ioread32(td->membase + TIMBDMA_IER); + ier |= 1 << id; + dev_dbg(chan2dev(&td_chan->chan), "Enabling irq: %d, IER: 0x%x\n", id, + ier); + iowrite32(ier, td->membase + TIMBDMA_IER); +} + +/* Should be called with the spinlock held */ +static bool __td_dma_done_ack(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + struct timb_dma *td = (struct timb_dma *)((u8 *)td_chan - + id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma)); + u32 isr; + bool done = false; + + dev_dbg(chan2dev(&td_chan->chan), "Checking irq: %d, td: %p\n", id, td); + + isr = ioread32(td->membase + TIMBDMA_ISR) & (1 << id); + if (isr) { + iowrite32(isr, td->membase + TIMBDMA_ISR); + done = true; + } + + return done; +} + +static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc, + bool single) +{ + dma_addr_t addr; + int len; + + addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) | + dma_desc[4]; + + len = (dma_desc[3] << 8) | dma_desc[2]; + + if (single) + dma_unmap_single(chan2dev(&td_chan->chan), addr, len, + DMA_TO_DEVICE); + else + dma_unmap_page(chan2dev(&td_chan->chan), addr, len, + DMA_TO_DEVICE); +} + +static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single) +{ + struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan, + struct timb_dma_chan, chan); + u8 *descs; + + for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) { + __td_unmap_desc(td_chan, descs, single); + if (descs[0] & 0x02) + break; + } +} + +static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc, + struct scatterlist *sg, bool last) +{ + if (sg_dma_len(sg) > USHRT_MAX) { + dev_err(chan2dev(&td_chan->chan), "Too big sg element\n"); + return -EINVAL; + } + + /* length must be word aligned */ + if (sg_dma_len(sg) % sizeof(u32)) { + dev_err(chan2dev(&td_chan->chan), "Incorrect length: %d\n", + sg_dma_len(sg)); + return -EINVAL; + } + + dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: 0x%llx\n", + dma_desc, (unsigned long long)sg_dma_address(sg)); + + dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff; + dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff; + dma_desc[5] = (sg_dma_address(sg) >> 8) & 0xff; + dma_desc[4] = (sg_dma_address(sg) >> 0) & 0xff; + + dma_desc[3] = (sg_dma_len(sg) >> 8) & 0xff; + dma_desc[2] = (sg_dma_len(sg) >> 0) & 0xff; + + dma_desc[1] = 0x00; + dma_desc[0] = 0x21 | (last ? 0x02 : 0); /* tran, valid */ + + return 0; +} + +/* Must be called with the spinlock held */ +static void __td_start_dma(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc; + + if (td_chan->ongoing) { + dev_err(chan2dev(&td_chan->chan), + "Transfer already ongoing\n"); + return; + } + + td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc, + desc_node); + + dev_dbg(chan2dev(&td_chan->chan), + "td_chan: %p, chan: %d, membase: %p\n", + td_chan, td_chan->chan.chan_id, td_chan->membase); + + if (td_chan->direction == DMA_DEV_TO_MEM) { + + /* descriptor address */ + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR); + iowrite32(td_desc->txd.phys, td_chan->membase + + TIMBDMA_OFFS_RX_DLAR); + /* Bytes per line */ + iowrite32(td_chan->bytes_per_line, td_chan->membase + + TIMBDMA_OFFS_RX_BPRR); + /* enable RX */ + iowrite32(TIMBDMA_RX_EN, td_chan->membase + TIMBDMA_OFFS_RX_ER); + } else { + /* address high */ + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DHAR); + iowrite32(td_desc->txd.phys, td_chan->membase + + TIMBDMA_OFFS_TX_DLAR); + } + + td_chan->ongoing = true; + + if (td_desc->interrupt) + __td_enable_chan_irq(td_chan); +} + +static void __td_finish(struct timb_dma_chan *td_chan) +{ + dma_async_tx_callback callback; + void *param; + struct dma_async_tx_descriptor *txd; + struct timb_dma_desc *td_desc; + + /* can happen if the descriptor is canceled */ + if (list_empty(&td_chan->active_list)) + return; + + td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc, + desc_node); + txd = &td_desc->txd; + + dev_dbg(chan2dev(&td_chan->chan), "descriptor %u complete\n", + txd->cookie); + + /* make sure to stop the transfer */ + if (td_chan->direction == DMA_DEV_TO_MEM) + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER); +/* Currently no support for stopping DMA transfers + else + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR); +*/ + dma_cookie_complete(txd); + td_chan->ongoing = false; + + callback = txd->callback; + param = txd->callback_param; + + list_move(&td_desc->desc_node, &td_chan->free_list); + + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) + __td_unmap_descs(td_desc, + txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE); + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); +} + +static u32 __td_ier_mask(struct timb_dma *td) +{ + int i; + u32 ret = 0; + + for (i = 0; i < td->dma.chancnt; i++) { + struct timb_dma_chan *td_chan = td->channels + i; + if (td_chan->ongoing) { + struct timb_dma_desc *td_desc = + list_entry(td_chan->active_list.next, + struct timb_dma_desc, desc_node); + if (td_desc->interrupt) + ret |= 1 << i; + } + } + + return ret; +} + +static void __td_start_next(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc; + + BUG_ON(list_empty(&td_chan->queue)); + BUG_ON(td_chan->ongoing); + + td_desc = list_entry(td_chan->queue.next, struct timb_dma_desc, + desc_node); + + dev_dbg(chan2dev(&td_chan->chan), "%s: started %u\n", + __func__, td_desc->txd.cookie); + + list_move(&td_desc->desc_node, &td_chan->active_list); + __td_start_dma(td_chan); +} + +static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct timb_dma_desc *td_desc = container_of(txd, struct timb_dma_desc, + txd); + struct timb_dma_chan *td_chan = container_of(txd->chan, + struct timb_dma_chan, chan); + dma_cookie_t cookie; + + spin_lock_bh(&td_chan->lock); + cookie = dma_cookie_assign(txd); + + if (list_empty(&td_chan->active_list)) { + dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__, + txd->cookie); + list_add_tail(&td_desc->desc_node, &td_chan->active_list); + __td_start_dma(td_chan); + } else { + dev_dbg(chan2dev(txd->chan), "tx_submit: queued %u\n", + txd->cookie); + + list_add_tail(&td_desc->desc_node, &td_chan->queue); + } + + spin_unlock_bh(&td_chan->lock); + + return cookie; +} + +static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan) +{ + struct dma_chan *chan = &td_chan->chan; + struct timb_dma_desc *td_desc; + int err; + + td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL); + if (!td_desc) { + dev_err(chan2dev(chan), "Failed to alloc descriptor\n"); + goto out; + } + + td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE; + + td_desc->desc_list = kzalloc(td_desc->desc_list_len, GFP_KERNEL); + if (!td_desc->desc_list) { + dev_err(chan2dev(chan), "Failed to alloc descriptor\n"); + goto err; + } + + dma_async_tx_descriptor_init(&td_desc->txd, chan); + td_desc->txd.tx_submit = td_tx_submit; + td_desc->txd.flags = DMA_CTRL_ACK; + + td_desc->txd.phys = dma_map_single(chan2dmadev(chan), + td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE); + + err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys); + if (err) { + dev_err(chan2dev(chan), "DMA mapping error: %d\n", err); + goto err; + } + + return td_desc; +err: + kfree(td_desc->desc_list); + kfree(td_desc); +out: + return NULL; + +} + +static void td_free_desc(struct timb_dma_desc *td_desc) +{ + dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc); + dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys, + td_desc->desc_list_len, DMA_TO_DEVICE); + + kfree(td_desc->desc_list); + kfree(td_desc); +} + +static void td_desc_put(struct timb_dma_chan *td_chan, + struct timb_dma_desc *td_desc) +{ + dev_dbg(chan2dev(&td_chan->chan), "Putting desc: %p\n", td_desc); + + spin_lock_bh(&td_chan->lock); + list_add(&td_desc->desc_node, &td_chan->free_list); + spin_unlock_bh(&td_chan->lock); +} + +static struct timb_dma_desc *td_desc_get(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc, *_td_desc; + struct timb_dma_desc *ret = NULL; + + spin_lock_bh(&td_chan->lock); + list_for_each_entry_safe(td_desc, _td_desc, &td_chan->free_list, + desc_node) { + if (async_tx_test_ack(&td_desc->txd)) { + list_del(&td_desc->desc_node); + ret = td_desc; + break; + } + dev_dbg(chan2dev(&td_chan->chan), "desc %p not ACKed\n", + td_desc); + } + spin_unlock_bh(&td_chan->lock); + + return ret; +} + +static int td_alloc_chan_resources(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + int i; + + dev_dbg(chan2dev(chan), "%s: entry\n", __func__); + + BUG_ON(!list_empty(&td_chan->free_list)); + for (i = 0; i < td_chan->descs; i++) { + struct timb_dma_desc *td_desc = td_alloc_init_desc(td_chan); + if (!td_desc) { + if (i) + break; + else { + dev_err(chan2dev(chan), + "Couldnt allocate any descriptors\n"); + return -ENOMEM; + } + } + + td_desc_put(td_chan, td_desc); + } + + spin_lock_bh(&td_chan->lock); + dma_cookie_init(chan); + spin_unlock_bh(&td_chan->lock); + + return 0; +} + +static void td_free_chan_resources(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc, *_td_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + /* check that all descriptors are free */ + BUG_ON(!list_empty(&td_chan->active_list)); + BUG_ON(!list_empty(&td_chan->queue)); + + spin_lock_bh(&td_chan->lock); + list_splice_init(&td_chan->free_list, &list); + spin_unlock_bh(&td_chan->lock); + + list_for_each_entry_safe(td_desc, _td_desc, &list, desc_node) { + dev_dbg(chan2dev(chan), "%s: Freeing desc: %p\n", __func__, + td_desc); + td_free_desc(td_desc); + } +} + +static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + enum dma_status ret; + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + ret = dma_cookie_status(chan, cookie, txstate); + + dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", __func__, ret); + + return ret; +} + +static void td_issue_pending(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + spin_lock_bh(&td_chan->lock); + + if (!list_empty(&td_chan->active_list)) + /* transfer ongoing */ + if (__td_dma_done_ack(td_chan)) + __td_finish(td_chan); + + if (list_empty(&td_chan->active_list) && !list_empty(&td_chan->queue)) + __td_start_next(td_chan); + + spin_unlock_bh(&td_chan->lock); +} + +static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc; + struct scatterlist *sg; + unsigned int i; + unsigned int desc_usage = 0; + + if (!sgl || !sg_len) { + dev_err(chan2dev(chan), "%s: No SG list\n", __func__); + return NULL; + } + + /* even channels are for RX, odd for TX */ + if (td_chan->direction != direction) { + dev_err(chan2dev(chan), + "Requesting channel in wrong direction\n"); + return NULL; + } + + td_desc = td_desc_get(td_chan); + if (!td_desc) { + dev_err(chan2dev(chan), "Not enough descriptors available\n"); + return NULL; + } + + td_desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0; + + for_each_sg(sgl, sg, sg_len, i) { + int err; + if (desc_usage > td_desc->desc_list_len) { + dev_err(chan2dev(chan), "No descriptor space\n"); + return NULL; + } + + err = td_fill_desc(td_chan, td_desc->desc_list + desc_usage, sg, + i == (sg_len - 1)); + if (err) { + dev_err(chan2dev(chan), "Failed to update desc: %d\n", + err); + td_desc_put(td_chan, td_desc); + return NULL; + } + desc_usage += TIMB_DMA_DESC_SIZE; + } + + dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys, + td_desc->desc_list_len, DMA_MEM_TO_DEV); + + return &td_desc->txd; +} + +static int td_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc, *_td_desc; + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + /* first the easy part, put the queue into the free list */ + spin_lock_bh(&td_chan->lock); + list_for_each_entry_safe(td_desc, _td_desc, &td_chan->queue, + desc_node) + list_move(&td_desc->desc_node, &td_chan->free_list); + + /* now tear down the running */ + __td_finish(td_chan); + spin_unlock_bh(&td_chan->lock); + + return 0; +} + +static void td_tasklet(unsigned long data) +{ + struct timb_dma *td = (struct timb_dma *)data; + u32 isr; + u32 ipr; + u32 ier; + int i; + + isr = ioread32(td->membase + TIMBDMA_ISR); + ipr = isr & __td_ier_mask(td); + + /* ack the interrupts */ + iowrite32(ipr, td->membase + TIMBDMA_ISR); + + for (i = 0; i < td->dma.chancnt; i++) + if (ipr & (1 << i)) { + struct timb_dma_chan *td_chan = td->channels + i; + spin_lock(&td_chan->lock); + __td_finish(td_chan); + if (!list_empty(&td_chan->queue)) + __td_start_next(td_chan); + spin_unlock(&td_chan->lock); + } + + ier = __td_ier_mask(td); + iowrite32(ier, td->membase + TIMBDMA_IER); +} + + +static irqreturn_t td_irq(int irq, void *devid) +{ + struct timb_dma *td = devid; + u32 ipr = ioread32(td->membase + TIMBDMA_IPR); + + if (ipr) { + /* disable interrupts, will be re-enabled in tasklet */ + iowrite32(0, td->membase + TIMBDMA_IER); + + tasklet_schedule(&td->tasklet); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + + +static int __devinit td_probe(struct platform_device *pdev) +{ + struct timb_dma_platform_data *pdata = pdev->dev.platform_data; + struct timb_dma *td; + struct resource *iomem; + int irq; + int err; + int i; + + if (!pdata) { + dev_err(&pdev->dev, "No platform data\n"); + return -EINVAL; + } + + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iomem) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + if (!request_mem_region(iomem->start, resource_size(iomem), + DRIVER_NAME)) + return -EBUSY; + + td = kzalloc(sizeof(struct timb_dma) + + sizeof(struct timb_dma_chan) * pdata->nr_channels, GFP_KERNEL); + if (!td) { + err = -ENOMEM; + goto err_release_region; + } + + dev_dbg(&pdev->dev, "Allocated TD: %p\n", td); + + td->membase = ioremap(iomem->start, resource_size(iomem)); + if (!td->membase) { + dev_err(&pdev->dev, "Failed to remap I/O memory\n"); + err = -ENOMEM; + goto err_free_mem; + } + + /* 32bit addressing */ + iowrite32(TIMBDMA_32BIT_ADDR, td->membase + TIMBDMA_ACR); + + /* disable and clear any interrupts */ + iowrite32(0x0, td->membase + TIMBDMA_IER); + iowrite32(0xFFFFFFFF, td->membase + TIMBDMA_ISR); + + tasklet_init(&td->tasklet, td_tasklet, (unsigned long)td); + + err = request_irq(irq, td_irq, IRQF_SHARED, DRIVER_NAME, td); + if (err) { + dev_err(&pdev->dev, "Failed to request IRQ\n"); + goto err_tasklet_kill; + } + + td->dma.device_alloc_chan_resources = td_alloc_chan_resources; + td->dma.device_free_chan_resources = td_free_chan_resources; + td->dma.device_tx_status = td_tx_status; + td->dma.device_issue_pending = td_issue_pending; + + dma_cap_set(DMA_SLAVE, td->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, td->dma.cap_mask); + td->dma.device_prep_slave_sg = td_prep_slave_sg; + td->dma.device_control = td_control; + + td->dma.dev = &pdev->dev; + + INIT_LIST_HEAD(&td->dma.channels); + + for (i = 0; i < pdata->nr_channels; i++) { + struct timb_dma_chan *td_chan = &td->channels[i]; + struct timb_dma_platform_data_channel *pchan = + pdata->channels + i; + + /* even channels are RX, odd are TX */ + if ((i % 2) == pchan->rx) { + dev_err(&pdev->dev, "Wrong channel configuration\n"); + err = -EINVAL; + goto err_free_irq; + } + + td_chan->chan.device = &td->dma; + dma_cookie_init(&td_chan->chan); + spin_lock_init(&td_chan->lock); + INIT_LIST_HEAD(&td_chan->active_list); + INIT_LIST_HEAD(&td_chan->queue); + INIT_LIST_HEAD(&td_chan->free_list); + + td_chan->descs = pchan->descriptors; + td_chan->desc_elems = pchan->descriptor_elements; + td_chan->bytes_per_line = pchan->bytes_per_line; + td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM : + DMA_MEM_TO_DEV; + + td_chan->membase = td->membase + + (i / 2) * TIMBDMA_INSTANCE_OFFSET + + (pchan->rx ? 0 : TIMBDMA_INSTANCE_TX_OFFSET); + + dev_dbg(&pdev->dev, "Chan: %d, membase: %p\n", + i, td_chan->membase); + + list_add_tail(&td_chan->chan.device_node, &td->dma.channels); + } + + err = dma_async_device_register(&td->dma); + if (err) { + dev_err(&pdev->dev, "Failed to register async device\n"); + goto err_free_irq; + } + + platform_set_drvdata(pdev, td); + + dev_dbg(&pdev->dev, "Probe result: %d\n", err); + return err; + +err_free_irq: + free_irq(irq, td); +err_tasklet_kill: + tasklet_kill(&td->tasklet); + iounmap(td->membase); +err_free_mem: + kfree(td); +err_release_region: + release_mem_region(iomem->start, resource_size(iomem)); + + return err; + +} + +static int __devexit td_remove(struct platform_device *pdev) +{ + struct timb_dma *td = platform_get_drvdata(pdev); + struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + + dma_async_device_unregister(&td->dma); + free_irq(irq, td); + tasklet_kill(&td->tasklet); + iounmap(td->membase); + kfree(td); + release_mem_region(iomem->start, resource_size(iomem)); + + platform_set_drvdata(pdev, NULL); + + dev_dbg(&pdev->dev, "Removed...\n"); + return 0; +} + +static struct platform_driver td_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = td_probe, + .remove = __exit_p(td_remove), +}; + +module_platform_driver(td_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Timberdale DMA controller driver"); +MODULE_AUTHOR("Pelagicore AB <info@pelagicore.com>"); +MODULE_ALIAS("platform:"DRIVER_NAME); diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c new file mode 100644 index 00000000..913f55c7 --- /dev/null +++ b/drivers/dma/txx9dmac.c @@ -0,0 +1,1343 @@ +/* + * Driver for the TXx9 SoC DMA Controller + * + * Copyright (C) 2009 Atsushi Nemoto + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/scatterlist.h> + +#include "dmaengine.h" +#include "txx9dmac.h" + +static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan) +{ + return container_of(chan, struct txx9dmac_chan, chan); +} + +static struct txx9dmac_cregs __iomem *__dma_regs(const struct txx9dmac_chan *dc) +{ + return dc->ch_regs; +} + +static struct txx9dmac_cregs32 __iomem *__dma_regs32( + const struct txx9dmac_chan *dc) +{ + return dc->ch_regs; +} + +#define channel64_readq(dc, name) \ + __raw_readq(&(__dma_regs(dc)->name)) +#define channel64_writeq(dc, name, val) \ + __raw_writeq((val), &(__dma_regs(dc)->name)) +#define channel64_readl(dc, name) \ + __raw_readl(&(__dma_regs(dc)->name)) +#define channel64_writel(dc, name, val) \ + __raw_writel((val), &(__dma_regs(dc)->name)) + +#define channel32_readl(dc, name) \ + __raw_readl(&(__dma_regs32(dc)->name)) +#define channel32_writel(dc, name, val) \ + __raw_writel((val), &(__dma_regs32(dc)->name)) + +#define channel_readq(dc, name) channel64_readq(dc, name) +#define channel_writeq(dc, name, val) channel64_writeq(dc, name, val) +#define channel_readl(dc, name) \ + (is_dmac64(dc) ? \ + channel64_readl(dc, name) : channel32_readl(dc, name)) +#define channel_writel(dc, name, val) \ + (is_dmac64(dc) ? \ + channel64_writel(dc, name, val) : channel32_writel(dc, name, val)) + +static dma_addr_t channel64_read_CHAR(const struct txx9dmac_chan *dc) +{ + if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64)) + return channel64_readq(dc, CHAR); + else + return channel64_readl(dc, CHAR); +} + +static void channel64_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val) +{ + if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64)) + channel64_writeq(dc, CHAR, val); + else + channel64_writel(dc, CHAR, val); +} + +static void channel64_clear_CHAR(const struct txx9dmac_chan *dc) +{ +#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR) + channel64_writel(dc, CHAR, 0); + channel64_writel(dc, __pad_CHAR, 0); +#else + channel64_writeq(dc, CHAR, 0); +#endif +} + +static dma_addr_t channel_read_CHAR(const struct txx9dmac_chan *dc) +{ + if (is_dmac64(dc)) + return channel64_read_CHAR(dc); + else + return channel32_readl(dc, CHAR); +} + +static void channel_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val) +{ + if (is_dmac64(dc)) + channel64_write_CHAR(dc, val); + else + channel32_writel(dc, CHAR, val); +} + +static struct txx9dmac_regs __iomem *__txx9dmac_regs( + const struct txx9dmac_dev *ddev) +{ + return ddev->regs; +} + +static struct txx9dmac_regs32 __iomem *__txx9dmac_regs32( + const struct txx9dmac_dev *ddev) +{ + return ddev->regs; +} + +#define dma64_readl(ddev, name) \ + __raw_readl(&(__txx9dmac_regs(ddev)->name)) +#define dma64_writel(ddev, name, val) \ + __raw_writel((val), &(__txx9dmac_regs(ddev)->name)) + +#define dma32_readl(ddev, name) \ + __raw_readl(&(__txx9dmac_regs32(ddev)->name)) +#define dma32_writel(ddev, name, val) \ + __raw_writel((val), &(__txx9dmac_regs32(ddev)->name)) + +#define dma_readl(ddev, name) \ + (__is_dmac64(ddev) ? \ + dma64_readl(ddev, name) : dma32_readl(ddev, name)) +#define dma_writel(ddev, name, val) \ + (__is_dmac64(ddev) ? \ + dma64_writel(ddev, name, val) : dma32_writel(ddev, name, val)) + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +static struct txx9dmac_desc * +txd_to_txx9dmac_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct txx9dmac_desc, txd); +} + +static dma_addr_t desc_read_CHAR(const struct txx9dmac_chan *dc, + const struct txx9dmac_desc *desc) +{ + return is_dmac64(dc) ? desc->hwdesc.CHAR : desc->hwdesc32.CHAR; +} + +static void desc_write_CHAR(const struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc, dma_addr_t val) +{ + if (is_dmac64(dc)) + desc->hwdesc.CHAR = val; + else + desc->hwdesc32.CHAR = val; +} + +#define TXX9_DMA_MAX_COUNT 0x04000000 + +#define TXX9_DMA_INITIAL_DESC_COUNT 64 + +static struct txx9dmac_desc *txx9dmac_first_active(struct txx9dmac_chan *dc) +{ + return list_entry(dc->active_list.next, + struct txx9dmac_desc, desc_node); +} + +static struct txx9dmac_desc *txx9dmac_last_active(struct txx9dmac_chan *dc) +{ + return list_entry(dc->active_list.prev, + struct txx9dmac_desc, desc_node); +} + +static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc) +{ + return list_entry(dc->queue.next, struct txx9dmac_desc, desc_node); +} + +static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) +{ + if (!list_empty(&desc->tx_list)) + desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node); + return desc; +} + +static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx); + +static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc, + gfp_t flags) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + + desc = kzalloc(sizeof(*desc), flags); + if (!desc) + return NULL; + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, &dc->chan); + desc->txd.tx_submit = txx9dmac_tx_submit; + /* txd.flags will be overwritten in prep funcs */ + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = dma_map_single(chan2parent(&dc->chan), &desc->hwdesc, + ddev->descsize, DMA_TO_DEVICE); + return desc; +} + +static struct txx9dmac_desc *txx9dmac_desc_get(struct txx9dmac_chan *dc) +{ + struct txx9dmac_desc *desc, *_desc; + struct txx9dmac_desc *ret = NULL; + unsigned int i = 0; + + spin_lock_bh(&dc->lock); + list_for_each_entry_safe(desc, _desc, &dc->free_list, desc_node) { + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&dc->chan), "desc %p not ACKed\n", desc); + i++; + } + spin_unlock_bh(&dc->lock); + + dev_vdbg(chan2dev(&dc->chan), "scanned %u descriptors on freelist\n", + i); + if (!ret) { + ret = txx9dmac_desc_alloc(dc, GFP_ATOMIC); + if (ret) { + spin_lock_bh(&dc->lock); + dc->descs_allocated++; + spin_unlock_bh(&dc->lock); + } else + dev_err(chan2dev(&dc->chan), + "not enough descriptors available\n"); + } + return ret; +} + +static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *child; + + list_for_each_entry(child, &desc->tx_list, desc_node) + dma_sync_single_for_cpu(chan2parent(&dc->chan), + child->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + dma_sync_single_for_cpu(chan2parent(&dc->chan), + desc->txd.phys, ddev->descsize, + DMA_TO_DEVICE); +} + +/* + * Move a descriptor, including any children, to the free list. + * `desc' must not be on any lists. + */ +static void txx9dmac_desc_put(struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc) +{ + if (desc) { + struct txx9dmac_desc *child; + + txx9dmac_sync_desc_for_cpu(dc, desc); + + spin_lock_bh(&dc->lock); + list_for_each_entry(child, &desc->tx_list, desc_node) + dev_vdbg(chan2dev(&dc->chan), + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->tx_list, &dc->free_list); + dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", + desc); + list_add(&desc->desc_node, &dc->free_list); + spin_unlock_bh(&dc->lock); + } +} + +/*----------------------------------------------------------------------*/ + +static void txx9dmac_dump_regs(struct txx9dmac_chan *dc) +{ + if (is_dmac64(dc)) + dev_err(chan2dev(&dc->chan), + " CHAR: %#llx SAR: %#llx DAR: %#llx CNTR: %#x" + " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n", + (u64)channel64_read_CHAR(dc), + channel64_readq(dc, SAR), + channel64_readq(dc, DAR), + channel64_readl(dc, CNTR), + channel64_readl(dc, SAIR), + channel64_readl(dc, DAIR), + channel64_readl(dc, CCR), + channel64_readl(dc, CSR)); + else + dev_err(chan2dev(&dc->chan), + " CHAR: %#x SAR: %#x DAR: %#x CNTR: %#x" + " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n", + channel32_readl(dc, CHAR), + channel32_readl(dc, SAR), + channel32_readl(dc, DAR), + channel32_readl(dc, CNTR), + channel32_readl(dc, SAIR), + channel32_readl(dc, DAIR), + channel32_readl(dc, CCR), + channel32_readl(dc, CSR)); +} + +static void txx9dmac_reset_chan(struct txx9dmac_chan *dc) +{ + channel_writel(dc, CCR, TXX9_DMA_CCR_CHRST); + if (is_dmac64(dc)) { + channel64_clear_CHAR(dc); + channel_writeq(dc, SAR, 0); + channel_writeq(dc, DAR, 0); + } else { + channel_writel(dc, CHAR, 0); + channel_writel(dc, SAR, 0); + channel_writel(dc, DAR, 0); + } + channel_writel(dc, CNTR, 0); + channel_writel(dc, SAIR, 0); + channel_writel(dc, DAIR, 0); + channel_writel(dc, CCR, 0); + mmiowb(); +} + +/* Called with dc->lock held and bh disabled */ +static void txx9dmac_dostart(struct txx9dmac_chan *dc, + struct txx9dmac_desc *first) +{ + struct txx9dmac_slave *ds = dc->chan.private; + u32 sai, dai; + + dev_vdbg(chan2dev(&dc->chan), "dostart %u %p\n", + first->txd.cookie, first); + /* ASSERT: channel is idle */ + if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) { + dev_err(chan2dev(&dc->chan), + "BUG: Attempted to start non-idle channel\n"); + txx9dmac_dump_regs(dc); + /* The tasklet will hopefully advance the queue... */ + return; + } + + if (is_dmac64(dc)) { + channel64_writel(dc, CNTR, 0); + channel64_writel(dc, CSR, 0xffffffff); + if (ds) { + if (ds->tx_reg) { + sai = ds->reg_width; + dai = 0; + } else { + sai = 0; + dai = ds->reg_width; + } + } else { + sai = 8; + dai = 8; + } + channel64_writel(dc, SAIR, sai); + channel64_writel(dc, DAIR, dai); + /* All 64-bit DMAC supports SMPCHN */ + channel64_writel(dc, CCR, dc->ccr); + /* Writing a non zero value to CHAR will assert XFACT */ + channel64_write_CHAR(dc, first->txd.phys); + } else { + channel32_writel(dc, CNTR, 0); + channel32_writel(dc, CSR, 0xffffffff); + if (ds) { + if (ds->tx_reg) { + sai = ds->reg_width; + dai = 0; + } else { + sai = 0; + dai = ds->reg_width; + } + } else { + sai = 4; + dai = 4; + } + channel32_writel(dc, SAIR, sai); + channel32_writel(dc, DAIR, dai); + if (txx9_dma_have_SMPCHN()) { + channel32_writel(dc, CCR, dc->ccr); + /* Writing a non zero value to CHAR will assert XFACT */ + channel32_writel(dc, CHAR, first->txd.phys); + } else { + channel32_writel(dc, CHAR, first->txd.phys); + channel32_writel(dc, CCR, dc->ccr); + } + } +} + +/*----------------------------------------------------------------------*/ + +static void +txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, + struct txx9dmac_desc *desc) +{ + dma_async_tx_callback callback; + void *param; + struct dma_async_tx_descriptor *txd = &desc->txd; + struct txx9dmac_slave *ds = dc->chan.private; + + dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n", + txd->cookie, desc); + + dma_cookie_complete(txd); + callback = txd->callback; + param = txd->callback_param; + + txx9dmac_sync_desc_for_cpu(dc, desc); + list_splice_init(&desc->tx_list, &dc->free_list); + list_move(&desc->desc_node, &dc->free_list); + + if (!ds) { + dma_addr_t dmaaddr; + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + dmaaddr = is_dmac64(dc) ? + desc->hwdesc.DAR : desc->hwdesc32.DAR; + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(chan2parent(&dc->chan), + dmaaddr, desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(chan2parent(&dc->chan), + dmaaddr, desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + dmaaddr = is_dmac64(dc) ? + desc->hwdesc.SAR : desc->hwdesc32.SAR; + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(chan2parent(&dc->chan), + dmaaddr, desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(chan2parent(&dc->chan), + dmaaddr, desc->len, DMA_TO_DEVICE); + } + } + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); + dma_run_dependencies(txd); +} + +static void txx9dmac_dequeue(struct txx9dmac_chan *dc, struct list_head *list) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + struct txx9dmac_desc *prev = NULL; + + BUG_ON(!list_empty(list)); + do { + desc = txx9dmac_first_queued(dc); + if (prev) { + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + } + prev = txx9dmac_last_child(desc); + list_move_tail(&desc->desc_node, list); + /* Make chain-completion interrupt happen */ + if ((desc->txd.flags & DMA_PREP_INTERRUPT) && + !txx9dmac_chan_INTENT(dc)) + break; + } while (!list_empty(&dc->queue)); +} + +static void txx9dmac_complete_all(struct txx9dmac_chan *dc) +{ + struct txx9dmac_desc *desc, *_desc; + LIST_HEAD(list); + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + list_splice_init(&dc->active_list, &list); + if (!list_empty(&dc->queue)) { + txx9dmac_dequeue(dc, &dc->active_list); + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + } + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + txx9dmac_descriptor_complete(dc, desc); +} + +static void txx9dmac_dump_desc(struct txx9dmac_chan *dc, + struct txx9dmac_hwdesc *desc) +{ + if (is_dmac64(dc)) { +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN + dev_crit(chan2dev(&dc->chan), + " desc: ch%#llx s%#llx d%#llx c%#x\n", + (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR); +#else + dev_crit(chan2dev(&dc->chan), + " desc: ch%#llx s%#llx d%#llx c%#x" + " si%#x di%#x cc%#x cs%#x\n", + (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR, + desc->SAIR, desc->DAIR, desc->CCR, desc->CSR); +#endif + } else { + struct txx9dmac_hwdesc32 *d = (struct txx9dmac_hwdesc32 *)desc; +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN + dev_crit(chan2dev(&dc->chan), + " desc: ch%#x s%#x d%#x c%#x\n", + d->CHAR, d->SAR, d->DAR, d->CNTR); +#else + dev_crit(chan2dev(&dc->chan), + " desc: ch%#x s%#x d%#x c%#x" + " si%#x di%#x cc%#x cs%#x\n", + d->CHAR, d->SAR, d->DAR, d->CNTR, + d->SAIR, d->DAIR, d->CCR, d->CSR); +#endif + } +} + +static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr) +{ + struct txx9dmac_desc *bad_desc; + struct txx9dmac_desc *child; + u32 errors; + + /* + * The descriptor currently at the head of the active list is + * borked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + dev_crit(chan2dev(&dc->chan), "Abnormal Chain Completion\n"); + txx9dmac_dump_regs(dc); + + bad_desc = txx9dmac_first_active(dc); + list_del_init(&bad_desc->desc_node); + + /* Clear all error flags and try to restart the controller */ + errors = csr & (TXX9_DMA_CSR_ABCHC | + TXX9_DMA_CSR_CFERR | TXX9_DMA_CSR_CHERR | + TXX9_DMA_CSR_DESERR | TXX9_DMA_CSR_SORERR); + channel_writel(dc, CSR, errors); + + if (list_empty(&dc->active_list) && !list_empty(&dc->queue)) + txx9dmac_dequeue(dc, &dc->active_list); + if (!list_empty(&dc->active_list)) + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + + dev_crit(chan2dev(&dc->chan), + "Bad descriptor submitted for DMA! (cookie: %d)\n", + bad_desc->txd.cookie); + txx9dmac_dump_desc(dc, &bad_desc->hwdesc); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) + txx9dmac_dump_desc(dc, &child->hwdesc); + /* Pretend the descriptor completed successfully */ + txx9dmac_descriptor_complete(dc, bad_desc); +} + +static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc) +{ + dma_addr_t chain; + struct txx9dmac_desc *desc, *_desc; + struct txx9dmac_desc *child; + u32 csr; + + if (is_dmac64(dc)) { + chain = channel64_read_CHAR(dc); + csr = channel64_readl(dc, CSR); + channel64_writel(dc, CSR, csr); + } else { + chain = channel32_readl(dc, CHAR); + csr = channel32_readl(dc, CSR); + channel32_writel(dc, CSR, csr); + } + /* For dynamic chain, we should look at XFACT instead of NCHNC */ + if (!(csr & (TXX9_DMA_CSR_XFACT | TXX9_DMA_CSR_ABCHC))) { + /* Everything we've submitted is done */ + txx9dmac_complete_all(dc); + return; + } + if (!(csr & TXX9_DMA_CSR_CHNEN)) + chain = 0; /* last descriptor of this chain */ + + dev_vdbg(chan2dev(&dc->chan), "scan_descriptors: char=%#llx\n", + (u64)chain); + + list_for_each_entry_safe(desc, _desc, &dc->active_list, desc_node) { + if (desc_read_CHAR(dc, desc) == chain) { + /* This one is currently in progress */ + if (csr & TXX9_DMA_CSR_ABCHC) + goto scan_done; + return; + } + + list_for_each_entry(child, &desc->tx_list, desc_node) + if (desc_read_CHAR(dc, child) == chain) { + /* Currently in progress */ + if (csr & TXX9_DMA_CSR_ABCHC) + goto scan_done; + return; + } + + /* + * No descriptors so far seem to be in progress, i.e. + * this one must be done. + */ + txx9dmac_descriptor_complete(dc, desc); + } +scan_done: + if (csr & TXX9_DMA_CSR_ABCHC) { + txx9dmac_handle_error(dc, csr); + return; + } + + dev_err(chan2dev(&dc->chan), + "BUG: All descriptors done, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + txx9dmac_reset_chan(dc); + + if (!list_empty(&dc->queue)) { + txx9dmac_dequeue(dc, &dc->active_list); + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + } +} + +static void txx9dmac_chan_tasklet(unsigned long data) +{ + int irq; + u32 csr; + struct txx9dmac_chan *dc; + + dc = (struct txx9dmac_chan *)data; + csr = channel_readl(dc, CSR); + dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", csr); + + spin_lock(&dc->lock); + if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC | + TXX9_DMA_CSR_NTRNFC)) + txx9dmac_scan_descriptors(dc); + spin_unlock(&dc->lock); + irq = dc->irq; + + enable_irq(irq); +} + +static irqreturn_t txx9dmac_chan_interrupt(int irq, void *dev_id) +{ + struct txx9dmac_chan *dc = dev_id; + + dev_vdbg(chan2dev(&dc->chan), "interrupt: status=%#x\n", + channel_readl(dc, CSR)); + + tasklet_schedule(&dc->tasklet); + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + disable_irq_nosync(irq); + + return IRQ_HANDLED; +} + +static void txx9dmac_tasklet(unsigned long data) +{ + int irq; + u32 csr; + struct txx9dmac_chan *dc; + + struct txx9dmac_dev *ddev = (struct txx9dmac_dev *)data; + u32 mcr; + int i; + + mcr = dma_readl(ddev, MCR); + dev_vdbg(ddev->chan[0]->dma.dev, "tasklet: mcr=%x\n", mcr); + for (i = 0; i < TXX9_DMA_MAX_NR_CHANNELS; i++) { + if ((mcr >> (24 + i)) & 0x11) { + dc = ddev->chan[i]; + csr = channel_readl(dc, CSR); + dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", + csr); + spin_lock(&dc->lock); + if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC | + TXX9_DMA_CSR_NTRNFC)) + txx9dmac_scan_descriptors(dc); + spin_unlock(&dc->lock); + } + } + irq = ddev->irq; + + enable_irq(irq); +} + +static irqreturn_t txx9dmac_interrupt(int irq, void *dev_id) +{ + struct txx9dmac_dev *ddev = dev_id; + + dev_vdbg(ddev->chan[0]->dma.dev, "interrupt: status=%#x\n", + dma_readl(ddev, MCR)); + + tasklet_schedule(&ddev->tasklet); + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + disable_irq_nosync(irq); + + return IRQ_HANDLED; +} + +/*----------------------------------------------------------------------*/ + +static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct txx9dmac_desc *desc = txd_to_txx9dmac_desc(tx); + struct txx9dmac_chan *dc = to_txx9dmac_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&dc->lock); + cookie = dma_cookie_assign(tx); + + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n", + desc->txd.cookie, desc); + + list_add_tail(&desc->desc_node, &dc->queue); + spin_unlock_bh(&dc->lock); + + return cookie; +} + +static struct dma_async_tx_descriptor * +txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + struct txx9dmac_desc *first; + struct txx9dmac_desc *prev; + size_t xfer_count; + size_t offset; + + dev_vdbg(chan2dev(chan), "prep_dma_memcpy d%#llx s%#llx l%#zx f%#lx\n", + (u64)dest, (u64)src, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + prev = first = NULL; + + for (offset = 0; offset < len; offset += xfer_count) { + xfer_count = min_t(size_t, len - offset, TXX9_DMA_MAX_COUNT); + /* + * Workaround for ERT-TX49H2-033, ERT-TX49H3-020, + * ERT-TX49H4-016 (slightly conservative) + */ + if (__is_dmac64(ddev)) { + if (xfer_count > 0x100 && + (xfer_count & 0xff) >= 0xfa && + (xfer_count & 0xff) <= 0xff) + xfer_count -= 0x20; + } else { + if (xfer_count > 0x80 && + (xfer_count & 0x7f) >= 0x7e && + (xfer_count & 0x7f) <= 0x7f) + xfer_count -= 0x20; + } + + desc = txx9dmac_desc_get(dc); + if (!desc) { + txx9dmac_desc_put(dc, first); + return NULL; + } + + if (__is_dmac64(ddev)) { + desc->hwdesc.SAR = src + offset; + desc->hwdesc.DAR = dest + offset; + desc->hwdesc.CNTR = xfer_count; + txx9dmac_desc_set_nosimple(ddev, desc, 8, 8, + dc->ccr | TXX9_DMA_CCR_XFACT); + } else { + desc->hwdesc32.SAR = src + offset; + desc->hwdesc32.DAR = dest + offset; + desc->hwdesc32.CNTR = xfer_count; + txx9dmac_desc_set_nosimple(ddev, desc, 4, 4, + dc->ccr | TXX9_DMA_CCR_XFACT); + } + + /* + * The descriptors on tx_list are not reachable from + * the dc->queue list or dc->active_list after a + * submit. If we put all descriptors on active_list, + * calling of callback on the completion will be more + * complex. + */ + if (!first) { + first = desc; + } else { + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + /* Trigger interrupt after last block */ + if (flags & DMA_PREP_INTERRUPT) + txx9dmac_desc_set_INTENT(ddev, prev); + + desc_write_CHAR(dc, prev, 0); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = len; + + return &first->txd; +} + +static struct dma_async_tx_descriptor * +txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_slave *ds = chan->private; + struct txx9dmac_desc *prev; + struct txx9dmac_desc *first; + unsigned int i; + struct scatterlist *sg; + + dev_vdbg(chan2dev(chan), "prep_dma_slave\n"); + + BUG_ON(!ds || !ds->reg_width); + if (ds->tx_reg) + BUG_ON(direction != DMA_MEM_TO_DEV); + else + BUG_ON(direction != DMA_DEV_TO_MEM); + if (unlikely(!sg_len)) + return NULL; + + prev = first = NULL; + + for_each_sg(sgl, sg, sg_len, i) { + struct txx9dmac_desc *desc; + dma_addr_t mem; + u32 sai, dai; + + desc = txx9dmac_desc_get(dc); + if (!desc) { + txx9dmac_desc_put(dc, first); + return NULL; + } + + mem = sg_dma_address(sg); + + if (__is_dmac64(ddev)) { + if (direction == DMA_MEM_TO_DEV) { + desc->hwdesc.SAR = mem; + desc->hwdesc.DAR = ds->tx_reg; + } else { + desc->hwdesc.SAR = ds->rx_reg; + desc->hwdesc.DAR = mem; + } + desc->hwdesc.CNTR = sg_dma_len(sg); + } else { + if (direction == DMA_MEM_TO_DEV) { + desc->hwdesc32.SAR = mem; + desc->hwdesc32.DAR = ds->tx_reg; + } else { + desc->hwdesc32.SAR = ds->rx_reg; + desc->hwdesc32.DAR = mem; + } + desc->hwdesc32.CNTR = sg_dma_len(sg); + } + if (direction == DMA_MEM_TO_DEV) { + sai = ds->reg_width; + dai = 0; + } else { + sai = 0; + dai = ds->reg_width; + } + txx9dmac_desc_set_nosimple(ddev, desc, sai, dai, + dc->ccr | TXX9_DMA_CCR_XFACT); + + if (!first) { + first = desc; + } else { + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, + ddev->descsize, + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + /* Trigger interrupt after last block */ + if (flags & DMA_PREP_INTERRUPT) + txx9dmac_desc_set_INTENT(ddev, prev); + + desc_write_CHAR(dc, prev, 0); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = 0; + + return &first->txd; +} + +static int txx9dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_desc *desc, *_desc; + LIST_HEAD(list); + + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -EINVAL; + + dev_vdbg(chan2dev(chan), "terminate_all\n"); + spin_lock_bh(&dc->lock); + + txx9dmac_reset_chan(dc); + + /* active_list entries will end up before queued entries */ + list_splice_init(&dc->queue, &list); + list_splice_init(&dc->active_list, &list); + + spin_unlock_bh(&dc->lock); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + txx9dmac_descriptor_complete(dc, desc); + + return 0; +} + +static enum dma_status +txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret != DMA_SUCCESS) { + spin_lock_bh(&dc->lock); + txx9dmac_scan_descriptors(dc); + spin_unlock_bh(&dc->lock); + + ret = dma_cookie_status(chan, cookie, txstate); + } + + return ret; +} + +static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc, + struct txx9dmac_desc *prev) +{ + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc; + LIST_HEAD(list); + + prev = txx9dmac_last_child(prev); + txx9dmac_dequeue(dc, &list); + desc = list_entry(list.next, struct txx9dmac_desc, desc_node); + desc_write_CHAR(dc, prev, desc->txd.phys); + dma_sync_single_for_device(chan2parent(&dc->chan), + prev->txd.phys, ddev->descsize, + DMA_TO_DEVICE); + mmiowb(); + if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) && + channel_read_CHAR(dc) == prev->txd.phys) + /* Restart chain DMA */ + channel_write_CHAR(dc, desc->txd.phys); + list_splice_tail(&list, &dc->active_list); +} + +static void txx9dmac_issue_pending(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + + spin_lock_bh(&dc->lock); + + if (!list_empty(&dc->active_list)) + txx9dmac_scan_descriptors(dc); + if (!list_empty(&dc->queue)) { + if (list_empty(&dc->active_list)) { + txx9dmac_dequeue(dc, &dc->active_list); + txx9dmac_dostart(dc, txx9dmac_first_active(dc)); + } else if (txx9_dma_have_SMPCHN()) { + struct txx9dmac_desc *prev = txx9dmac_last_active(dc); + + if (!(prev->txd.flags & DMA_PREP_INTERRUPT) || + txx9dmac_chan_INTENT(dc)) + txx9dmac_chain_dynamic(dc, prev); + } + } + + spin_unlock_bh(&dc->lock); +} + +static int txx9dmac_alloc_chan_resources(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_slave *ds = chan->private; + struct txx9dmac_desc *desc; + int i; + + dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); + + /* ASSERT: channel is idle */ + if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) { + dev_dbg(chan2dev(chan), "DMA channel not idle?\n"); + return -EIO; + } + + dma_cookie_init(chan); + + dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE; + txx9dmac_chan_set_SMPCHN(dc); + if (!txx9_dma_have_SMPCHN() || (dc->ccr & TXX9_DMA_CCR_SMPCHN)) + dc->ccr |= TXX9_DMA_CCR_INTENC; + if (chan->device->device_prep_dma_memcpy) { + if (ds) + return -EINVAL; + dc->ccr |= TXX9_DMA_CCR_XFSZ_X8; + } else { + if (!ds || + (ds->tx_reg && ds->rx_reg) || (!ds->tx_reg && !ds->rx_reg)) + return -EINVAL; + dc->ccr |= TXX9_DMA_CCR_EXTRQ | + TXX9_DMA_CCR_XFSZ(__ffs(ds->reg_width)); + txx9dmac_chan_set_INTENT(dc); + } + + spin_lock_bh(&dc->lock); + i = dc->descs_allocated; + while (dc->descs_allocated < TXX9_DMA_INITIAL_DESC_COUNT) { + spin_unlock_bh(&dc->lock); + + desc = txx9dmac_desc_alloc(dc, GFP_KERNEL); + if (!desc) { + dev_info(chan2dev(chan), + "only allocated %d descriptors\n", i); + spin_lock_bh(&dc->lock); + break; + } + txx9dmac_desc_put(dc, desc); + + spin_lock_bh(&dc->lock); + i = ++dc->descs_allocated; + } + spin_unlock_bh(&dc->lock); + + dev_dbg(chan2dev(chan), + "alloc_chan_resources allocated %d descriptors\n", i); + + return i; +} + +static void txx9dmac_free_chan_resources(struct dma_chan *chan) +{ + struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); + struct txx9dmac_dev *ddev = dc->ddev; + struct txx9dmac_desc *desc, *_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n", + dc->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&dc->active_list)); + BUG_ON(!list_empty(&dc->queue)); + BUG_ON(channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT); + + spin_lock_bh(&dc->lock); + list_splice_init(&dc->free_list, &list); + dc->descs_allocated = 0; + spin_unlock_bh(&dc->lock); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) { + dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); + dma_unmap_single(chan2parent(chan), desc->txd.phys, + ddev->descsize, DMA_TO_DEVICE); + kfree(desc); + } + + dev_vdbg(chan2dev(chan), "free_chan_resources done\n"); +} + +/*----------------------------------------------------------------------*/ + +static void txx9dmac_off(struct txx9dmac_dev *ddev) +{ + dma_writel(ddev, MCR, 0); + mmiowb(); +} + +static int __init txx9dmac_chan_probe(struct platform_device *pdev) +{ + struct txx9dmac_chan_platform_data *cpdata = pdev->dev.platform_data; + struct platform_device *dmac_dev = cpdata->dmac_dev; + struct txx9dmac_platform_data *pdata = dmac_dev->dev.platform_data; + struct txx9dmac_chan *dc; + int err; + int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS; + int irq; + + dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL); + if (!dc) + return -ENOMEM; + + dc->dma.dev = &pdev->dev; + dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources; + dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources; + dc->dma.device_control = txx9dmac_control; + dc->dma.device_tx_status = txx9dmac_tx_status; + dc->dma.device_issue_pending = txx9dmac_issue_pending; + if (pdata && pdata->memcpy_chan == ch) { + dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy; + dma_cap_set(DMA_MEMCPY, dc->dma.cap_mask); + } else { + dc->dma.device_prep_slave_sg = txx9dmac_prep_slave_sg; + dma_cap_set(DMA_SLAVE, dc->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, dc->dma.cap_mask); + } + + INIT_LIST_HEAD(&dc->dma.channels); + dc->ddev = platform_get_drvdata(dmac_dev); + if (dc->ddev->irq < 0) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + tasklet_init(&dc->tasklet, txx9dmac_chan_tasklet, + (unsigned long)dc); + dc->irq = irq; + err = devm_request_irq(&pdev->dev, dc->irq, + txx9dmac_chan_interrupt, 0, dev_name(&pdev->dev), dc); + if (err) + return err; + } else + dc->irq = -1; + dc->ddev->chan[ch] = dc; + dc->chan.device = &dc->dma; + list_add_tail(&dc->chan.device_node, &dc->chan.device->channels); + dma_cookie_init(&dc->chan); + + if (is_dmac64(dc)) + dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch]; + else + dc->ch_regs = &__txx9dmac_regs32(dc->ddev)->CHAN[ch]; + spin_lock_init(&dc->lock); + + INIT_LIST_HEAD(&dc->active_list); + INIT_LIST_HEAD(&dc->queue); + INIT_LIST_HEAD(&dc->free_list); + + txx9dmac_reset_chan(dc); + + platform_set_drvdata(pdev, dc); + + err = dma_async_device_register(&dc->dma); + if (err) + return err; + dev_dbg(&pdev->dev, "TXx9 DMA Channel (dma%d%s%s)\n", + dc->dma.dev_id, + dma_has_cap(DMA_MEMCPY, dc->dma.cap_mask) ? " memcpy" : "", + dma_has_cap(DMA_SLAVE, dc->dma.cap_mask) ? " slave" : ""); + + return 0; +} + +static int __exit txx9dmac_chan_remove(struct platform_device *pdev) +{ + struct txx9dmac_chan *dc = platform_get_drvdata(pdev); + + dma_async_device_unregister(&dc->dma); + if (dc->irq >= 0) + tasklet_kill(&dc->tasklet); + dc->ddev->chan[pdev->id % TXX9_DMA_MAX_NR_CHANNELS] = NULL; + return 0; +} + +static int __init txx9dmac_probe(struct platform_device *pdev) +{ + struct txx9dmac_platform_data *pdata = pdev->dev.platform_data; + struct resource *io; + struct txx9dmac_dev *ddev; + u32 mcr; + int err; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + ddev = devm_kzalloc(&pdev->dev, sizeof(*ddev), GFP_KERNEL); + if (!ddev) + return -ENOMEM; + + if (!devm_request_mem_region(&pdev->dev, io->start, resource_size(io), + dev_name(&pdev->dev))) + return -EBUSY; + + ddev->regs = devm_ioremap(&pdev->dev, io->start, resource_size(io)); + if (!ddev->regs) + return -ENOMEM; + ddev->have_64bit_regs = pdata->have_64bit_regs; + if (__is_dmac64(ddev)) + ddev->descsize = sizeof(struct txx9dmac_hwdesc); + else + ddev->descsize = sizeof(struct txx9dmac_hwdesc32); + + /* force dma off, just in case */ + txx9dmac_off(ddev); + + ddev->irq = platform_get_irq(pdev, 0); + if (ddev->irq >= 0) { + tasklet_init(&ddev->tasklet, txx9dmac_tasklet, + (unsigned long)ddev); + err = devm_request_irq(&pdev->dev, ddev->irq, + txx9dmac_interrupt, 0, dev_name(&pdev->dev), ddev); + if (err) + return err; + } + + mcr = TXX9_DMA_MCR_MSTEN | MCR_LE; + if (pdata && pdata->memcpy_chan >= 0) + mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan); + dma_writel(ddev, MCR, mcr); + + platform_set_drvdata(pdev, ddev); + return 0; +} + +static int __exit txx9dmac_remove(struct platform_device *pdev) +{ + struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); + + txx9dmac_off(ddev); + if (ddev->irq >= 0) + tasklet_kill(&ddev->tasklet); + return 0; +} + +static void txx9dmac_shutdown(struct platform_device *pdev) +{ + struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); + + txx9dmac_off(ddev); +} + +static int txx9dmac_suspend_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); + + txx9dmac_off(ddev); + return 0; +} + +static int txx9dmac_resume_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); + struct txx9dmac_platform_data *pdata = pdev->dev.platform_data; + u32 mcr; + + mcr = TXX9_DMA_MCR_MSTEN | MCR_LE; + if (pdata && pdata->memcpy_chan >= 0) + mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan); + dma_writel(ddev, MCR, mcr); + return 0; + +} + +static const struct dev_pm_ops txx9dmac_dev_pm_ops = { + .suspend_noirq = txx9dmac_suspend_noirq, + .resume_noirq = txx9dmac_resume_noirq, +}; + +static struct platform_driver txx9dmac_chan_driver = { + .remove = __exit_p(txx9dmac_chan_remove), + .driver = { + .name = "txx9dmac-chan", + }, +}; + +static struct platform_driver txx9dmac_driver = { + .remove = __exit_p(txx9dmac_remove), + .shutdown = txx9dmac_shutdown, + .driver = { + .name = "txx9dmac", + .pm = &txx9dmac_dev_pm_ops, + }, +}; + +static int __init txx9dmac_init(void) +{ + int rc; + + rc = platform_driver_probe(&txx9dmac_driver, txx9dmac_probe); + if (!rc) { + rc = platform_driver_probe(&txx9dmac_chan_driver, + txx9dmac_chan_probe); + if (rc) + platform_driver_unregister(&txx9dmac_driver); + } + return rc; +} +module_init(txx9dmac_init); + +static void __exit txx9dmac_exit(void) +{ + platform_driver_unregister(&txx9dmac_chan_driver); + platform_driver_unregister(&txx9dmac_driver); +} +module_exit(txx9dmac_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TXx9 DMA Controller driver"); +MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>"); +MODULE_ALIAS("platform:txx9dmac"); +MODULE_ALIAS("platform:txx9dmac-chan"); diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h new file mode 100644 index 00000000..f5a76059 --- /dev/null +++ b/drivers/dma/txx9dmac.h @@ -0,0 +1,307 @@ +/* + * Driver for the TXx9 SoC DMA Controller + * + * Copyright (C) 2009 Atsushi Nemoto + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef TXX9DMAC_H +#define TXX9DMAC_H + +#include <linux/dmaengine.h> +#include <asm/txx9/dmac.h> + +/* + * Design Notes: + * + * This DMAC have four channels and one FIFO buffer. Each channel can + * be configured for memory-memory or device-memory transfer, but only + * one channel can do alignment-free memory-memory transfer at a time + * while the channel should occupy the FIFO buffer for effective + * transfers. + * + * Instead of dynamically assign the FIFO buffer to channels, I chose + * make one dedicated channel for memory-memory transfer. The + * dedicated channel is public. Other channels are private and used + * for slave transfer. Some devices in the SoC are wired to certain + * DMA channel. + */ + +#ifdef CONFIG_MACH_TX49XX +static inline bool txx9_dma_have_SMPCHN(void) +{ + return true; +} +#define TXX9_DMA_USE_SIMPLE_CHAIN +#else +static inline bool txx9_dma_have_SMPCHN(void) +{ + return false; +} +#endif + +#ifdef __LITTLE_ENDIAN +#ifdef CONFIG_MACH_TX49XX +#define CCR_LE TXX9_DMA_CCR_LE +#define MCR_LE 0 +#else +#define CCR_LE 0 +#define MCR_LE TXX9_DMA_MCR_LE +#endif +#else +#define CCR_LE 0 +#define MCR_LE 0 +#endif + +/* + * Redefine this macro to handle differences between 32- and 64-bit + * addressing, big vs. little endian, etc. + */ +#ifdef __BIG_ENDIAN +#define TXX9_DMA_REG32(name) u32 __pad_##name; u32 name +#else +#define TXX9_DMA_REG32(name) u32 name; u32 __pad_##name +#endif + +/* Hardware register definitions. */ +struct txx9dmac_cregs { +#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR) + TXX9_DMA_REG32(CHAR); /* Chain Address Register */ +#else + u64 CHAR; /* Chain Address Register */ +#endif + u64 SAR; /* Source Address Register */ + u64 DAR; /* Destination Address Register */ + TXX9_DMA_REG32(CNTR); /* Count Register */ + TXX9_DMA_REG32(SAIR); /* Source Address Increment Register */ + TXX9_DMA_REG32(DAIR); /* Destination Address Increment Register */ + TXX9_DMA_REG32(CCR); /* Channel Control Register */ + TXX9_DMA_REG32(CSR); /* Channel Status Register */ +}; +struct txx9dmac_cregs32 { + u32 CHAR; + u32 SAR; + u32 DAR; + u32 CNTR; + u32 SAIR; + u32 DAIR; + u32 CCR; + u32 CSR; +}; + +struct txx9dmac_regs { + /* per-channel registers */ + struct txx9dmac_cregs CHAN[TXX9_DMA_MAX_NR_CHANNELS]; + u64 __pad[9]; + u64 MFDR; /* Memory Fill Data Register */ + TXX9_DMA_REG32(MCR); /* Master Control Register */ +}; +struct txx9dmac_regs32 { + struct txx9dmac_cregs32 CHAN[TXX9_DMA_MAX_NR_CHANNELS]; + u32 __pad[9]; + u32 MFDR; + u32 MCR; +}; + +/* bits for MCR */ +#define TXX9_DMA_MCR_EIS(ch) (0x10000000<<(ch)) +#define TXX9_DMA_MCR_DIS(ch) (0x01000000<<(ch)) +#define TXX9_DMA_MCR_RSFIF 0x00000080 +#define TXX9_DMA_MCR_FIFUM(ch) (0x00000008<<(ch)) +#define TXX9_DMA_MCR_LE 0x00000004 +#define TXX9_DMA_MCR_RPRT 0x00000002 +#define TXX9_DMA_MCR_MSTEN 0x00000001 + +/* bits for CCRn */ +#define TXX9_DMA_CCR_IMMCHN 0x20000000 +#define TXX9_DMA_CCR_USEXFSZ 0x10000000 +#define TXX9_DMA_CCR_LE 0x08000000 +#define TXX9_DMA_CCR_DBINH 0x04000000 +#define TXX9_DMA_CCR_SBINH 0x02000000 +#define TXX9_DMA_CCR_CHRST 0x01000000 +#define TXX9_DMA_CCR_RVBYTE 0x00800000 +#define TXX9_DMA_CCR_ACKPOL 0x00400000 +#define TXX9_DMA_CCR_REQPL 0x00200000 +#define TXX9_DMA_CCR_EGREQ 0x00100000 +#define TXX9_DMA_CCR_CHDN 0x00080000 +#define TXX9_DMA_CCR_DNCTL 0x00060000 +#define TXX9_DMA_CCR_EXTRQ 0x00010000 +#define TXX9_DMA_CCR_INTRQD 0x0000e000 +#define TXX9_DMA_CCR_INTENE 0x00001000 +#define TXX9_DMA_CCR_INTENC 0x00000800 +#define TXX9_DMA_CCR_INTENT 0x00000400 +#define TXX9_DMA_CCR_CHNEN 0x00000200 +#define TXX9_DMA_CCR_XFACT 0x00000100 +#define TXX9_DMA_CCR_SMPCHN 0x00000020 +#define TXX9_DMA_CCR_XFSZ(order) (((order) << 2) & 0x0000001c) +#define TXX9_DMA_CCR_XFSZ_1 TXX9_DMA_CCR_XFSZ(0) +#define TXX9_DMA_CCR_XFSZ_2 TXX9_DMA_CCR_XFSZ(1) +#define TXX9_DMA_CCR_XFSZ_4 TXX9_DMA_CCR_XFSZ(2) +#define TXX9_DMA_CCR_XFSZ_8 TXX9_DMA_CCR_XFSZ(3) +#define TXX9_DMA_CCR_XFSZ_X4 TXX9_DMA_CCR_XFSZ(4) +#define TXX9_DMA_CCR_XFSZ_X8 TXX9_DMA_CCR_XFSZ(5) +#define TXX9_DMA_CCR_XFSZ_X16 TXX9_DMA_CCR_XFSZ(6) +#define TXX9_DMA_CCR_XFSZ_X32 TXX9_DMA_CCR_XFSZ(7) +#define TXX9_DMA_CCR_MEMIO 0x00000002 +#define TXX9_DMA_CCR_SNGAD 0x00000001 + +/* bits for CSRn */ +#define TXX9_DMA_CSR_CHNEN 0x00000400 +#define TXX9_DMA_CSR_STLXFER 0x00000200 +#define TXX9_DMA_CSR_XFACT 0x00000100 +#define TXX9_DMA_CSR_ABCHC 0x00000080 +#define TXX9_DMA_CSR_NCHNC 0x00000040 +#define TXX9_DMA_CSR_NTRNFC 0x00000020 +#define TXX9_DMA_CSR_EXTDN 0x00000010 +#define TXX9_DMA_CSR_CFERR 0x00000008 +#define TXX9_DMA_CSR_CHERR 0x00000004 +#define TXX9_DMA_CSR_DESERR 0x00000002 +#define TXX9_DMA_CSR_SORERR 0x00000001 + +struct txx9dmac_chan { + struct dma_chan chan; + struct dma_device dma; + struct txx9dmac_dev *ddev; + void __iomem *ch_regs; + struct tasklet_struct tasklet; + int irq; + u32 ccr; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + + unsigned int descs_allocated; +}; + +struct txx9dmac_dev { + void __iomem *regs; + struct tasklet_struct tasklet; + int irq; + struct txx9dmac_chan *chan[TXX9_DMA_MAX_NR_CHANNELS]; + bool have_64bit_regs; + unsigned int descsize; +}; + +static inline bool __is_dmac64(const struct txx9dmac_dev *ddev) +{ + return ddev->have_64bit_regs; +} + +static inline bool is_dmac64(const struct txx9dmac_chan *dc) +{ + return __is_dmac64(dc->ddev); +} + +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN +/* Hardware descriptor definition. (for simple-chain) */ +struct txx9dmac_hwdesc { +#if defined(CONFIG_32BIT) && !defined(CONFIG_64BIT_PHYS_ADDR) + TXX9_DMA_REG32(CHAR); +#else + u64 CHAR; +#endif + u64 SAR; + u64 DAR; + TXX9_DMA_REG32(CNTR); +}; +struct txx9dmac_hwdesc32 { + u32 CHAR; + u32 SAR; + u32 DAR; + u32 CNTR; +}; +#else +#define txx9dmac_hwdesc txx9dmac_cregs +#define txx9dmac_hwdesc32 txx9dmac_cregs32 +#endif + +struct txx9dmac_desc { + /* FIRST values the hardware uses */ + union { + struct txx9dmac_hwdesc hwdesc; + struct txx9dmac_hwdesc32 hwdesc32; + }; + + /* THEN values for driver housekeeping */ + struct list_head desc_node ____cacheline_aligned; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +#ifdef TXX9_DMA_USE_SIMPLE_CHAIN + +static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc) +{ + return (dc->ccr & TXX9_DMA_CCR_INTENT) != 0; +} + +static inline void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc) +{ + dc->ccr |= TXX9_DMA_CCR_INTENT; +} + +static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc) +{ +} + +static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc) +{ + dc->ccr |= TXX9_DMA_CCR_SMPCHN; +} + +static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc, + u32 sair, u32 dair, u32 ccr) +{ +} + +#else /* TXX9_DMA_USE_SIMPLE_CHAIN */ + +static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc) +{ + return true; +} + +static void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc) +{ +} + +static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc) +{ + if (__is_dmac64(ddev)) + desc->hwdesc.CCR |= TXX9_DMA_CCR_INTENT; + else + desc->hwdesc32.CCR |= TXX9_DMA_CCR_INTENT; +} + +static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc) +{ +} + +static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev, + struct txx9dmac_desc *desc, + u32 sai, u32 dai, u32 ccr) +{ + if (__is_dmac64(ddev)) { + desc->hwdesc.SAIR = sai; + desc->hwdesc.DAIR = dai; + desc->hwdesc.CCR = ccr; + } else { + desc->hwdesc32.SAIR = sai; + desc->hwdesc32.DAIR = dai; + desc->hwdesc32.CCR = ccr; + } +} + +#endif /* TXX9_DMA_USE_SIMPLE_CHAIN */ + +#endif /* TXX9DMAC_H */ |